From 0e9a9c23a9c04a37e2e4edf5a0b9cd0272ad3164 Mon Sep 17 00:00:00 2001 From: eastb233 Date: Wed, 11 Dec 2024 16:55:46 +0800 Subject: [PATCH] [SME] Revert SME because of performance issue --- ...-and-hip11-in-aarch64-cores.def-to-b.patch | 34 - ...l-frame-vars-in-shrink-wrapping-code.patch | 65 +- ...ch64-Cleanup-CPU-option-processing-c.patch | 336 - ...aarch64-Avoid-a-use-of-callee-offset.patch | 44 +- ...rch64-Cleanup-option-processing-code.patch | 528 - ...andle-frames-with-no-saved-registers.patch | 15 +- ...ch64-Add-march-support-for-Armv9.1-A.patch | 108 - ...bytes-below-saved-regs-to-frame-info.patch | 39 +- ...ert-aarch64-Define-__ARM_FEATURE_RCP.patch | 112 - ...dd-bytes-below-hard-fp-to-frame-info.patch | 23 +- ...ert-Ampere-1-and-Ampere-1A-core-defi.patch | 39 - ...ak-aarch64-save-restore-callee-saves.patch | 49 +- ...ch64-Rename-AARCH64_ISA-architecture.patch | 157 - ...ate-chain-offset-if-there-is-a-chain.patch | 15 +- ...ch64-Rename-AARCH64_FL-architecture-.patch | 220 - ...-locals-offset-to-bytes-above-locals.patch | 27 +- ...ch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch | 398 - ...ard-fp-offset-to-bytes-above-hard-fp.patch | 37 +- ...ch64-Add-V-to-aarch64-arches.def-nam.patch | 315 - ...112-aarch64-Tweak-frame-size-comment.patch | 18 +- ...ME-aarch64-Small-config.gcc-cleanups.patch | 55 - ...-offset-from-the-bottom-of-the-frame.patch | 41 +- ...ch64-Avoid-redundancy-in-aarch64-cor.patch | 273 - ...h64-Simplify-top-of-frame-allocation.patch | 15 +- ...ch64-Remove-AARCH64_FL_RCPC8_4-PR107.patch | 83 - ...rch64-Minor-initial-adjustment-tweak.patch | 15 +- ...ch64-Fix-transitive-closure-of-featu.patch | 154 - ...Tweak-stack-clash-boundary-condition.patch | 17 +- ...ch64-Reorder-an-entry-in-aarch64-opt.patch | 194 - ...-Put-LR-save-probe-in-first-16-bytes.patch | 27 +- ...aarch64-Simplify-feature-definitions.patch | 1176 -- ...lify-probe-of-final-frame-allocation.patch | 23 +- ...ch64-Simplify-generation-of-.arch-st.patch | 467 - ...record-probe-registers-in-frame-info.patch | 39 +- ...ch64-Avoid-std-string-in-static-data.patch | 43 - ...Remove-below-hard-fp-saved-regs-size.patch | 35 +- ...ch64-Tweak-constness-of-option-relat.patch | 195 - ...smash-canary-protect-saved-registers.patch | 35 +- ...ch64-Make-more-use-of-aarch64_featur.patch | 394 - ...rn-register-handling-in-untyped_call.patch | 0 ...ch64-Tweak-contents-of-flags_on-off-.patch | 70 - ... 0123-aarch64-Fix-loose-ldpstp-check.patch | 0 ...ch64-Tweak-handling-of-mgeneral-regs.patch | 370 - ...ch64-Remove-redundant-TARGET_-checks.patch | 453 - ...ME-aarch64-Define-__ARM_FEATURE_RCPC.patch | 132 - ...-Ampere-1-and-Ampere-1A-core-definit.patch | 29 - ...ch64-Fix-nosimd-handling-of-FPR-move.patch | 968 - ...-aarch64-Commonise-some-folding-code.patch | 83 - ...ch64-Add-a-Z-operand-modifier-for-SV.patch | 49 - ...e-switching-Remove-unused-bbnum-fiel.patch | 104 - ...e-switching-Tweak-the-macro-hook-doc.patch | 311 - ...-SME-mode-switching-Add-note-problem.patch | 35 - ...e-switching-Avoid-quadractic-list-op.patch | 90 - ...e-switching-Fix-the-mode-passed-to-t.patch | 136 - ...e-switching-Simplify-recording-of-tr.patch | 103 - ...e-switching-Tweak-entry-exit-handlin.patch | 92 - ...e-switching-Allow-targets-to-set-the.patch | 93 - ...e-switching-Pass-set-of-live-registe.patch | 211 - ...e-switching-Pass-the-set-of-live-reg.patch | 177 - ...e-switching-Use-1-based-edge-aux-fie.patch | 56 - ...e-switching-Add-a-target-configurabl.patch | 337 - ...E-mode-switching-Add-a-backprop-hook.patch | 483 - ...ch64-Add-a-result_mode-helper-functi.patch | 81 - ...-Try-to-remove-EH-edges-after-pro-ep.patch | 232 - ...-PR-middle-end-107705-ICE-after-recl.patch | 71 - ...ction-Change-return-type-of-predicat.patch | 351 - ...ow-prologues-and-epilogues-to-be-ins.patch | 233 - ...-a-target-hook-for-sibcall-epilogues.patch | 239 - ...-a-new-target-hook-TARGET_START_CALL.patch | 461 - ...ME-Allow-targets-to-add-USEs-to-asms.patch | 490 - ...-compact-syntax-for-insn-and-insn_sp.patch | 998 - ...og-Improve-parser-for-pattern-new-co.patch | 104 - ...port-SME-recog-Support-space-in-cons.patch | 49 - ...ch64-Generalise-require_immediate_la.patch | 164 - ...-aarch64-Add-backend-support-for-DFP.patch | 469 - ...aarch64-Vector-move-fixes-for-nosimd.patch | 1824 -- ...ch64-Simplify-output-template-emissi.patch | 213 - ...Improve-immediate-expansion-PR106583.patch | 631 - ...-AArch64-Cleanup-move-immediate-code.patch | 410 - ...ch64-convert-some-patterns-to-compac.patch | 229 - ...E-aarch64-Use-SVE-s-RDVL-instruction.patch | 792 - ...ch64-Make-AARCH64_FL_SVE-requirement.patch | 137 - ...ch64-Add-group-suffixes-to-SVE-intri.patch | 562 - ...ch64-Add-sve_type-to-SVE-builtins-co.patch | 230 - ...ch64-Generalise-some-SVE-ACLE-error-.patch | 1474 -- ...ch64-Replace-vague-previous-argument.patch | 698 - ...ch64-Make-more-use-of-sve_type-in-AC.patch | 368 - ...ch64-Tweak-error-message-for-tuple-v.patch | 106 - ...ch64-Add-tuple-forms-of-svreinterpre.patch | 1236 -- ...ribs-Use-existing-traits-for-excl_ha.patch | 90 - ...ow-target-attributes-in-non-gnu-name.patch | 2369 --- ...ME-aarch64-Fix-plugin-header-install.patch | 64 - ...ch64-Add-arm_streaming-_compatible-a.patch | 1178 -- 0175-Backport-SME-aarch64-Add-sme.patch | 330 - ...ch64-Add-r-m-and-m-r-alternatives-to.patch | 168 - ...ch64-Rewrite-simd-move-immediate-pat.patch | 167 - ...ch64-remove-test-comment-from-mov-mo.patch | 34 - ...ch64-Distinguish-streaming-compatibl.patch | 1552 -- ...ch64-Mark-relevant-SVE-instructions-.patch | 4506 ----- ...ME-AArch64-Support-new-tbranch-optab.patch | 250 - ...aarch64-Robustify-stack-tie-handling.patch | 126 - ...-Handle-epilogues-that-contain-jumps.patch | 201 - ...ch64-Use-vecs-to-store-register-save.patch | 709 - ...ch64-Put-LR-save-slot-first-in-more-.patch | 107 - ...arch64-Switch-PSTATE.SM-around-calls.patch | 3270 ---- ...ch64-Add-support-for-SME-ZA-attribut.patch | 4324 ----- ...ch64-Add-a-register-class-for-w12-w1.patch | 103 - ...ckport-SME-aarch64-Add-a-VNx1TI-mode.patch | 72 - ...ch64-Generalise-unspec_based_functio.patch | 118 - ...ch64-Generalise-_m-rules-for-SVE-int.patch | 117 - ...ME-aarch64-Add-support-for-arm_sme.h.patch | 15955 ---------------- ...ch64-Add-support-for-__arm_locally_s.patch | 1748 -- ...ch64-Handle-PSTATE.SM-across-abnorma.patch | 708 - ...ch64-Enforce-inlining-restrictions-f.patch | 913 - ...ch64-Update-sibcall-handling-for-SME.patch | 424 - ...gcc-aarch64-Configure-check-for-.var.patch | 117 - ...gcc-aarch64-Configure-check-for-__ge.patch | 117 - ...bgcc-aarch64-Add-SME-runtime-support.patch | 627 - ...gcc-aarch64-Add-SME-unwinder-support.patch | 70 - 0220-Backport-SME-libgcc-Fix-config.in.patch | 51 - ...ch64-Add-funwind-tables-to-some-test.patch | 54 - ...ch64-Skip-some-SME-register-save-tes.patch | 106 - ...t-SME-Add-OPTIONS_H_EXTRA-to-GTFILES.patch | 37 - 0224-Backport-SME-aarch64-Add-V1DI-mode.patch | 177 - ...ow-md-iterators-to-include-other-ite.patch | 217 - ...cv-Add-support-for-strlen-inline-exp.patch | 142 - ...ribs-Add-overloads-with-namespace-na.patch | 189 - ...-Add-array_slice-constructors-from-n.patch | 47 - ...-SME-A-couple-of-va_gc_atomic-tweaks.patch | 140 - ...dle-end-Fix-issue-of-poly_uint16-1-1.patch | 34 - ...dd-missing-header-file-in-aarch64.cc.patch | 24 - ...-SME-c-Add-support-for-__extension__.patch | 327 - ...-Updates-of-biggest-mode-for-hard-re.patch | 140 - ...Support-C2x-empty-initializer-braces.patch | 672 - ...ch64-Update-sizeless-tests-for-recen.patch | 115 - ...ribs-Namespace-aware-lookup_attribut.patch | 58 - ...amily-ICE-with-gnu-nocf_check-PR1069.patch | 281 - ...ch64-Fix-assert-in-aarch64_move_imm-.patch | 35 - ...tsuite-Only-run-fcf-protection-test-.patch | 37 - ...-PRs-106764-106765-and-107307-all-IC.patch | 113 - ...ch64-Remove-expected-error-for-compo.patch | 43 - ...rch64-Remove-redundant-builtins-code.patch | 264 - ...ch64-Fix-Armv9-a-warnings-that-get-e.patch | 63 - ...onicalize-X-Y-as-X-Y-in-match.pd-whe.patch | 184 - ...dle-end-Add-new-tbranch-optab-to-add.patch | 417 - ...low-Allow-dynamic-allocations-after-.patch | 110 - ...05169-Fix-references-to-discarded-se.patch | 225 - ...C-V-autovec-Verify-that-GET_MODE_NUN.patch | 53 - ...-operator-to-gimple_stmt_iterator-an.patch | 42 - ...e-optimization-110221-SLP-and-loop-m.patch | 75 - 0251-SME-Adapt-some-testsuites.patch | 116 - ...-backported-patches-and-IPA-prefetch.patch | 43 - ...hip09-and-hip11-in-aarch64-cores.def.patch | 32 - ...o-use-AI-model-to-guide-optimization.patch | 4 +- ...f-stream-in-functions-for-pre-versio.patch | 4 +- ...n-lto-symbol-parse-cross-lto-units-i.patch | 8 +- 0326-BUGFIX-Fix-build-error-on-risv_64.patch | 802 +- gcc.spec | 321 +- 158 files changed, 691 insertions(+), 68739 deletions(-) delete mode 100644 0103-SME-Remove-hip09-and-hip11-in-aarch64-cores.def-to-b.patch rename 0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch => 0103-aarch64-Use-local-frame-vars-in-shrink-wrapping-code.patch (87%) delete mode 100644 0104-Backport-SME-AArch64-Cleanup-CPU-option-processing-c.patch rename 0183-Backport-SME-aarch64-Avoid-a-use-of-callee_offset.patch => 0104-aarch64-Avoid-a-use-of-callee-offset.patch (61%) delete mode 100644 0105-Backport-SME-AArch64-Cleanup-option-processing-code.patch rename 0184-Backport-SME-aarch64-Explicitly-handle-frames-with-n.patch => 0105-aarch64-Explicitly-handle-frames-with-no-saved-registers.patch (80%) delete mode 100644 0106-Backport-SME-aarch64-Add-march-support-for-Armv9.1-A.patch rename 0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch => 0106-aarch64-Add-bytes-below-saved-regs-to-frame-info.patch (90%) delete mode 100644 0107-Backport-SME-Revert-aarch64-Define-__ARM_FEATURE_RCP.patch rename 0186-Backport-SME-aarch64-Add-bytes_below_hard_fp-to-fram.patch => 0107-aarch64-Add-bytes-below-hard-fp-to-frame-info.patch (83%) delete mode 100644 0108-Backport-SME-Revert-Ampere-1-and-Ampere-1A-core-defi.patch rename 0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch => 0108-aarch64-Tweak-aarch64-save-restore-callee-saves.patch (86%) delete mode 100644 0109-Backport-SME-aarch64-Rename-AARCH64_ISA-architecture.patch rename 0189-Backport-SME-aarch64-Only-calculate-chain_offset-if-.patch => 0109-aarch64-Only-calculate-chain-offset-if-there-is-a-chain.patch (77%) delete mode 100644 0110-Backport-SME-aarch64-Rename-AARCH64_FL-architecture-.patch rename 0190-Backport-SME-aarch64-Rename-locals_offset-to-bytes_a.patch => 0110-aarch64-Rename-locals-offset-to-bytes-above-locals.patch (80%) delete mode 100644 0111-Backport-SME-aarch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch rename 0191-Backport-SME-aarch64-Rename-hard_fp_offset-to-bytes_.patch => 0111-aarch64-Rename-hard-fp-offset-to-bytes-above-hard-fp.patch (84%) delete mode 100644 0112-Backport-SME-aarch64-Add-V-to-aarch64-arches.def-nam.patch rename 0192-Backport-SME-aarch64-Tweak-frame_size-comment.patch => 0112-aarch64-Tweak-frame-size-comment.patch (67%) delete mode 100644 0113-Backport-SME-aarch64-Small-config.gcc-cleanups.patch rename 0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch => 0113-aarch64-Measure-reg-offset-from-the-bottom-of-the-frame.patch (86%) delete mode 100644 0114-Backport-SME-aarch64-Avoid-redundancy-in-aarch64-cor.patch rename 0194-Backport-SME-aarch64-Simplify-top-of-frame-allocatio.patch => 0114-aarch64-Simplify-top-of-frame-allocation.patch (82%) delete mode 100644 0115-Backport-SME-aarch64-Remove-AARCH64_FL_RCPC8_4-PR107.patch rename 0195-Backport-SME-aarch64-Minor-initial-adjustment-tweak.patch => 0115-aarch64-Minor-initial-adjustment-tweak.patch (75%) delete mode 100644 0116-Backport-SME-aarch64-Fix-transitive-closure-of-featu.patch rename 0196-Backport-SME-aarch64-Tweak-stack-clash-boundary-cond.patch => 0116-aarch64-Tweak-stack-clash-boundary-condition.patch (90%) delete mode 100644 0117-Backport-SME-aarch64-Reorder-an-entry-in-aarch64-opt.patch rename 0197-Backport-SME-aarch64-Put-LR-save-probe-in-first-16-b.patch => 0117-aarch64-Put-LR-save-probe-in-first-16-bytes.patch (95%) delete mode 100644 0118-Backport-SME-aarch64-Simplify-feature-definitions.patch rename 0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch => 0118-aarch64-Simplify-probe-of-final-frame-allocation.patch (87%) delete mode 100644 0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch rename 0199-Backport-SME-aarch64-Explicitly-record-probe-registe.patch => 0119-aarch64-Explicitly-record-probe-registers-in-frame-info.patch (91%) delete mode 100644 0120-Backport-SME-aarch64-Avoid-std-string-in-static-data.patch rename 0200-Backport-SME-aarch64-Remove-below_hard_fp_saved_regs.patch => 0120-aarch64-Remove-below-hard-fp-saved-regs-size.patch (87%) delete mode 100644 0121-Backport-SME-aarch64-Tweak-constness-of-option-relat.patch rename 0201-Backport-SME-aarch64-Make-stack-smash-canary-protect.patch => 0121-aarch64-Make-stack-smash-canary-protect-saved-registers.patch (90%) delete mode 100644 0122-Backport-SME-aarch64-Make-more-use-of-aarch64_featur.patch rename 0253-aarch64-Fix-return-register-handling-in-untyped_call.patch => 0122-aarch64-Fix-return-register-handling-in-untyped_call.patch (100%) delete mode 100644 0123-Backport-SME-aarch64-Tweak-contents-of-flags_on-off-.patch rename 0254-aarch64-Fix-loose-ldpstp-check.patch => 0123-aarch64-Fix-loose-ldpstp-check.patch (100%) delete mode 100644 0124-Backport-SME-aarch64-Tweak-handling-of-mgeneral-regs.patch delete mode 100644 0125-Backport-SME-aarch64-Remove-redundant-TARGET_-checks.patch delete mode 100644 0126-Backport-SME-aarch64-Define-__ARM_FEATURE_RCPC.patch delete mode 100644 0127-Backport-SME-Add-Ampere-1-and-Ampere-1A-core-definit.patch delete mode 100644 0128-Backport-SME-aarch64-Fix-nosimd-handling-of-FPR-move.patch delete mode 100644 0129-Backport-SME-aarch64-Commonise-some-folding-code.patch delete mode 100644 0130-Backport-SME-aarch64-Add-a-Z-operand-modifier-for-SV.patch delete mode 100644 0131-Backport-SME-mode-switching-Remove-unused-bbnum-fiel.patch delete mode 100644 0132-Backport-SME-mode-switching-Tweak-the-macro-hook-doc.patch delete mode 100644 0133-Backport-SME-mode-switching-Add-note-problem.patch delete mode 100644 0134-Backport-SME-mode-switching-Avoid-quadractic-list-op.patch delete mode 100644 0135-Backport-SME-mode-switching-Fix-the-mode-passed-to-t.patch delete mode 100644 0136-Backport-SME-mode-switching-Simplify-recording-of-tr.patch delete mode 100644 0137-Backport-SME-mode-switching-Tweak-entry-exit-handlin.patch delete mode 100644 0138-Backport-SME-mode-switching-Allow-targets-to-set-the.patch delete mode 100644 0139-Backport-SME-mode-switching-Pass-set-of-live-registe.patch delete mode 100644 0140-Backport-SME-mode-switching-Pass-the-set-of-live-reg.patch delete mode 100644 0141-Backport-SME-mode-switching-Use-1-based-edge-aux-fie.patch delete mode 100644 0142-Backport-SME-mode-switching-Add-a-target-configurabl.patch delete mode 100644 0143-Backport-SME-mode-switching-Add-a-backprop-hook.patch delete mode 100644 0144-Backport-SME-aarch64-Add-a-result_mode-helper-functi.patch delete mode 100644 0145-Backport-SME-rtl-Try-to-remove-EH-edges-after-pro-ep.patch delete mode 100644 0146-Backport-SME-Fix-PR-middle-end-107705-ICE-after-recl.patch delete mode 100644 0147-Backport-SME-function-Change-return-type-of-predicat.patch delete mode 100644 0148-Backport-SME-Allow-prologues-and-epilogues-to-be-ins.patch delete mode 100644 0149-Backport-SME-Add-a-target-hook-for-sibcall-epilogues.patch delete mode 100644 0150-Backport-SME-Add-a-new-target-hook-TARGET_START_CALL.patch delete mode 100644 0151-Backport-SME-Allow-targets-to-add-USEs-to-asms.patch delete mode 100644 0152-Backport-SME-New-compact-syntax-for-insn-and-insn_sp.patch delete mode 100644 0153-Backport-SME-recog-Improve-parser-for-pattern-new-co.patch delete mode 100644 0154-Backport-SME-recog-Support-space-in-cons.patch delete mode 100644 0155-Backport-SME-aarch64-Generalise-require_immediate_la.patch delete mode 100644 0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch delete mode 100644 0157-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch delete mode 100644 0158-Backport-SME-aarch64-Simplify-output-template-emissi.patch delete mode 100644 0159-Backport-SME-Improve-immediate-expansion-PR106583.patch delete mode 100644 0160-Backport-SME-AArch64-Cleanup-move-immediate-code.patch delete mode 100644 0161-Backport-SME-AArch64-convert-some-patterns-to-compac.patch delete mode 100644 0162-Backport-SME-aarch64-Use-SVE-s-RDVL-instruction.patch delete mode 100644 0163-Backport-SME-aarch64-Make-AARCH64_FL_SVE-requirement.patch delete mode 100644 0164-Backport-SME-aarch64-Add-group-suffixes-to-SVE-intri.patch delete mode 100644 0165-Backport-SME-aarch64-Add-sve_type-to-SVE-builtins-co.patch delete mode 100644 0166-Backport-SME-aarch64-Generalise-some-SVE-ACLE-error-.patch delete mode 100644 0167-Backport-SME-aarch64-Replace-vague-previous-argument.patch delete mode 100644 0168-Backport-SME-aarch64-Make-more-use-of-sve_type-in-AC.patch delete mode 100644 0169-Backport-SME-aarch64-Tweak-error-message-for-tuple-v.patch delete mode 100644 0170-Backport-SME-aarch64-Add-tuple-forms-of-svreinterpre.patch delete mode 100644 0171-Backport-SME-attribs-Use-existing-traits-for-excl_ha.patch delete mode 100644 0172-Backport-SME-Allow-target-attributes-in-non-gnu-name.patch delete mode 100644 0173-Backport-SME-aarch64-Fix-plugin-header-install.patch delete mode 100644 0174-Backport-SME-aarch64-Add-arm_streaming-_compatible-a.patch delete mode 100644 0175-Backport-SME-aarch64-Add-sme.patch delete mode 100644 0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch delete mode 100644 0177-Backport-SME-AArch64-Rewrite-simd-move-immediate-pat.patch delete mode 100644 0178-Backport-SME-AArch64-remove-test-comment-from-mov-mo.patch delete mode 100644 0179-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch delete mode 100644 0180-Backport-SME-aarch64-Mark-relevant-SVE-instructions-.patch delete mode 100644 0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch delete mode 100644 0187-Backport-SME-aarch64-Robustify-stack-tie-handling.patch delete mode 100644 0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch delete mode 100644 0203-Backport-SME-aarch64-Use-vecs-to-store-register-save.patch delete mode 100644 0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch delete mode 100644 0205-Backport-SME-aarch64-Switch-PSTATE.SM-around-calls.patch delete mode 100644 0206-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch delete mode 100644 0207-Backport-SME-aarch64-Add-a-register-class-for-w12-w1.patch delete mode 100644 0208-Backport-SME-aarch64-Add-a-VNx1TI-mode.patch delete mode 100644 0209-Backport-SME-aarch64-Generalise-unspec_based_functio.patch delete mode 100644 0210-Backport-SME-aarch64-Generalise-_m-rules-for-SVE-int.patch delete mode 100644 0211-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch delete mode 100644 0212-Backport-SME-aarch64-Add-support-for-__arm_locally_s.patch delete mode 100644 0213-Backport-SME-aarch64-Handle-PSTATE.SM-across-abnorma.patch delete mode 100644 0214-Backport-SME-aarch64-Enforce-inlining-restrictions-f.patch delete mode 100644 0215-Backport-SME-aarch64-Update-sibcall-handling-for-SME.patch delete mode 100644 0216-Backport-SME-libgcc-aarch64-Configure-check-for-.var.patch delete mode 100644 0217-Backport-SME-libgcc-aarch64-Configure-check-for-__ge.patch delete mode 100644 0218-Backport-SME-libgcc-aarch64-Add-SME-runtime-support.patch delete mode 100644 0219-Backport-SME-libgcc-aarch64-Add-SME-unwinder-support.patch delete mode 100644 0220-Backport-SME-libgcc-Fix-config.in.patch delete mode 100644 0221-Backport-SME-aarch64-Add-funwind-tables-to-some-test.patch delete mode 100644 0222-Backport-SME-aarch64-Skip-some-SME-register-save-tes.patch delete mode 100644 0223-Backport-SME-Add-OPTIONS_H_EXTRA-to-GTFILES.patch delete mode 100644 0224-Backport-SME-aarch64-Add-V1DI-mode.patch delete mode 100644 0225-Backport-SME-Allow-md-iterators-to-include-other-ite.patch delete mode 100644 0226-Backport-SME-riscv-Add-support-for-strlen-inline-exp.patch delete mode 100644 0227-Backport-SME-attribs-Add-overloads-with-namespace-na.patch delete mode 100644 0228-Backport-SME-vec-Add-array_slice-constructors-from-n.patch delete mode 100644 0229-Backport-SME-A-couple-of-va_gc_atomic-tweaks.patch delete mode 100644 0230-Backport-SME-middle-end-Fix-issue-of-poly_uint16-1-1.patch delete mode 100644 0231-SME-Add-missing-header-file-in-aarch64.cc.patch delete mode 100644 0232-Backport-SME-c-Add-support-for-__extension__.patch delete mode 100644 0233-Backport-SME-lra-Updates-of-biggest-mode-for-hard-re.patch delete mode 100644 0234-Backport-SME-c-Support-C2x-empty-initializer-braces.patch delete mode 100644 0235-Backport-SME-aarch64-Update-sizeless-tests-for-recen.patch delete mode 100644 0236-Backport-SME-attribs-Namespace-aware-lookup_attribut.patch delete mode 100644 0237-Backport-SME-c-family-ICE-with-gnu-nocf_check-PR1069.patch delete mode 100644 0238-Backport-SME-AArch64-Fix-assert-in-aarch64_move_imm-.patch delete mode 100644 0239-Backport-SME-testsuite-Only-run-fcf-protection-test-.patch delete mode 100644 0240-Backport-SME-Fix-PRs-106764-106765-and-107307-all-IC.patch delete mode 100644 0241-Backport-SME-aarch64-Remove-expected-error-for-compo.patch delete mode 100644 0242-Backport-SME-aarch64-Remove-redundant-builtins-code.patch delete mode 100644 0243-Backport-SME-AArch64-Fix-Armv9-a-warnings-that-get-e.patch delete mode 100644 0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch delete mode 100644 0245-Backport-SME-middle-end-Add-new-tbranch-optab-to-add.patch delete mode 100644 0246-Backport-SME-explow-Allow-dynamic-allocations-after-.patch delete mode 100644 0247-Backport-SME-PR105169-Fix-references-to-discarded-se.patch delete mode 100644 0248-Backport-SME-RISC-V-autovec-Verify-that-GET_MODE_NUN.patch delete mode 100644 0249-Backport-SME-Add-operator-to-gimple_stmt_iterator-an.patch delete mode 100644 0250-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch delete mode 100644 0251-SME-Adapt-some-testsuites.patch delete mode 100644 0252-SME-Fix-error-by-backported-patches-and-IPA-prefetch.patch delete mode 100644 0285-SME-Recover-hip09-and-hip11-in-aarch64-cores.def.patch diff --git a/0103-SME-Remove-hip09-and-hip11-in-aarch64-cores.def-to-b.patch b/0103-SME-Remove-hip09-and-hip11-in-aarch64-cores.def-to-b.patch deleted file mode 100644 index 5589f82..0000000 --- a/0103-SME-Remove-hip09-and-hip11-in-aarch64-cores.def-to-b.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 72c48ade495ef99ef032a6c44365eb102b74888e Mon Sep 17 00:00:00 2001 -From: xiezhiheng -Date: Fri, 23 Aug 2024 15:14:04 +0800 -Subject: [PATCH 004/157] [SME] Remove hip09 and hip11 in aarch64-cores.def to - backport SME - -Will apply it in the end. ---- - gcc/config/aarch64/aarch64-cores.def | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def -index 601b72abb..70b11eb80 100644 ---- a/gcc/config/aarch64/aarch64-cores.def -+++ b/gcc/config/aarch64/aarch64-cores.def -@@ -130,7 +130,6 @@ AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F - - /* HiSilicon ('H') cores. */ - AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) --AARCH64_CORE("hip09", hip09, hip09, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_PROFILE | AARCH64_FL_PREDRES, hip09, 0x48, 0xd02, 0x0) - - /* ARMv8.3-A Architecture Processors. */ - -@@ -173,7 +172,6 @@ AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | - AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1) - - AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1) --AARCH64_CORE("hip11", hip11, hip11, 8_5A, AARCH64_FL_FOR_ARCH8_5| AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_F16, hip11, 0x48, 0xd22, -1) - - AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) - AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) --- -2.33.0 - diff --git a/0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch b/0103-aarch64-Use-local-frame-vars-in-shrink-wrapping-code.patch similarity index 87% rename from 0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch rename to 0103-aarch64-Use-local-frame-vars-in-shrink-wrapping-code.patch index 257cdb3..fe6e175 100644 --- a/0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch +++ b/0103-aarch64-Use-local-frame-vars-in-shrink-wrapping-code.patch @@ -1,10 +1,7 @@ -From 755f67b1abd70b3c3ea20076fe60c1d303bf1e0c Mon Sep 17 00:00:00 2001 +From 62fbb215cc817e9f2c1ca80282a64f4ee30806bc Mon Sep 17 00:00:00 2001 From: Richard Sandiford -Date: Tue, 12 Sep 2023 16:05:04 +0100 -Subject: [PATCH 083/157] [Backport][SME] aarch64: Use local frame vars in - shrink-wrapping code - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=76d89da25af3064e80c9b7b584c678ff72b1f0bd +Date: Tue, 12 Sep 2023 16:08:48 +0100 +Subject: [PATCH] aarch64: Use local frame vars in shrink-wrapping code aarch64_layout_frame uses a shorthand for referring to cfun->machine->frame: @@ -27,10 +24,10 @@ gcc/ 1 file changed, 64 insertions(+), 59 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 08a98f8ba..b7da1d0be 100644 +index 226dc9dffd47..ae42ffdedbeb 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc -@@ -8951,7 +8951,7 @@ aarch64_layout_frame (void) +@@ -8351,7 +8351,7 @@ aarch64_layout_frame (void) frame.is_scs_enabled = (!crtl->calls_eh_return && sanitize_flags_p (SANITIZE_SHADOW_CALL_STACK) @@ -39,7 +36,7 @@ index 08a98f8ba..b7da1d0be 100644 /* When shadow call stack is enabled, the scs_pop in the epilogue will restore x30, and we don't need to pop x30 again in the traditional -@@ -9363,6 +9363,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, +@@ -8763,6 +8763,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, unsigned start, unsigned limit, bool skip_wb, bool hard_fp_valid_p) { @@ -47,7 +44,7 @@ index 08a98f8ba..b7da1d0be 100644 rtx_insn *insn; unsigned regno; unsigned regno2; -@@ -9377,8 +9378,8 @@ aarch64_save_callee_saves (poly_int64 start_offset, +@@ -8777,8 +8778,8 @@ aarch64_save_callee_saves (poly_int64 start_offset, bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); if (skip_wb @@ -58,7 +55,7 @@ index 08a98f8ba..b7da1d0be 100644 continue; if (cfun->machine->reg_is_wrapped_separately[regno]) -@@ -9386,7 +9387,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, +@@ -8786,7 +8787,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, machine_mode mode = aarch64_reg_save_mode (regno); reg = gen_rtx_REG (mode, regno); @@ -67,7 +64,7 @@ index 08a98f8ba..b7da1d0be 100644 rtx base_rtx = stack_pointer_rtx; poly_int64 sp_offset = offset; -@@ -9399,7 +9400,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, +@@ -8799,7 +8800,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, { gcc_assert (known_eq (start_offset, 0)); poly_int64 fp_offset @@ -76,7 +73,7 @@ index 08a98f8ba..b7da1d0be 100644 if (hard_fp_valid_p) base_rtx = hard_frame_pointer_rtx; else -@@ -9421,8 +9422,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, +@@ -8821,8 +8822,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit && !cfun->machine->reg_is_wrapped_separately[regno2] && known_eq (GET_MODE_SIZE (mode), @@ -86,7 +83,7 @@ index 08a98f8ba..b7da1d0be 100644 { rtx reg2 = gen_rtx_REG (mode, regno2); rtx mem2; -@@ -9472,6 +9472,7 @@ static void +@@ -8872,6 +8872,7 @@ static void aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, unsigned limit, bool skip_wb, rtx *cfi_ops) { @@ -94,7 +91,7 @@ index 08a98f8ba..b7da1d0be 100644 unsigned regno; unsigned regno2; poly_int64 offset; -@@ -9488,13 +9489,13 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, +@@ -8888,13 +8889,13 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, rtx reg, mem; if (skip_wb @@ -111,7 +108,7 @@ index 08a98f8ba..b7da1d0be 100644 rtx base_rtx = stack_pointer_rtx; if (mode == VNx2DImode && BYTES_BIG_ENDIAN) aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, -@@ -9505,8 +9506,7 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, +@@ -8905,8 +8906,7 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit && !cfun->machine->reg_is_wrapped_separately[regno2] && known_eq (GET_MODE_SIZE (mode), @@ -121,7 +118,7 @@ index 08a98f8ba..b7da1d0be 100644 { rtx reg2 = gen_rtx_REG (mode, regno2); rtx mem2; -@@ -9611,6 +9611,7 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset) +@@ -9011,6 +9011,7 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset) static sbitmap aarch64_get_separate_components (void) { @@ -129,7 +126,7 @@ index 08a98f8ba..b7da1d0be 100644 sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1); bitmap_clear (components); -@@ -9627,18 +9628,18 @@ aarch64_get_separate_components (void) +@@ -9027,18 +9028,18 @@ aarch64_get_separate_components (void) if (mode == VNx2DImode && BYTES_BIG_ENDIAN) continue; @@ -151,7 +148,7 @@ index 08a98f8ba..b7da1d0be 100644 else offset += crtl->outgoing_args_size; -@@ -9657,11 +9658,11 @@ aarch64_get_separate_components (void) +@@ -9057,11 +9058,11 @@ aarch64_get_separate_components (void) /* If the spare predicate register used by big-endian SVE code is call-preserved, it must be saved in the main prologue before any saves that use it. */ @@ -167,7 +164,7 @@ index 08a98f8ba..b7da1d0be 100644 /* If registers have been chosen to be stored/restored with writeback don't interfere with them to avoid having to output explicit stack adjustment instructions. */ -@@ -9770,6 +9771,7 @@ aarch64_get_next_set_bit (sbitmap bmp, unsigned int start) +@@ -9170,6 +9171,7 @@ aarch64_get_next_set_bit (sbitmap bmp, unsigned int start) static void aarch64_process_components (sbitmap components, bool prologue_p) { @@ -175,7 +172,7 @@ index 08a98f8ba..b7da1d0be 100644 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM); -@@ -9784,9 +9786,9 @@ aarch64_process_components (sbitmap components, bool prologue_p) +@@ -9184,9 +9186,9 @@ aarch64_process_components (sbitmap components, bool prologue_p) machine_mode mode = aarch64_reg_save_mode (regno); rtx reg = gen_rtx_REG (mode, regno); @@ -187,7 +184,7 @@ index 08a98f8ba..b7da1d0be 100644 else offset += crtl->outgoing_args_size; -@@ -9811,14 +9813,14 @@ aarch64_process_components (sbitmap components, bool prologue_p) +@@ -9211,14 +9213,14 @@ aarch64_process_components (sbitmap components, bool prologue_p) break; } @@ -204,7 +201,7 @@ index 08a98f8ba..b7da1d0be 100644 GET_MODE_SIZE (mode))) { insn = emit_insn (set); -@@ -9840,7 +9842,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) +@@ -9240,7 +9242,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) /* REGNO2 can be saved/restored in a pair with REGNO. */ rtx reg2 = gen_rtx_REG (mode, regno2); if (frame_pointer_needed) @@ -213,7 +210,7 @@ index 08a98f8ba..b7da1d0be 100644 else offset2 += crtl->outgoing_args_size; rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); -@@ -9935,6 +9937,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, +@@ -9335,6 +9337,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, bool frame_related_p, bool final_adjustment_p) { @@ -221,7 +218,7 @@ index 08a98f8ba..b7da1d0be 100644 HOST_WIDE_INT guard_size = 1 << param_stack_clash_protection_guard_size; HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; -@@ -9955,25 +9958,25 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, +@@ -9355,25 +9358,25 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, register as a probe. We can't assume that LR was saved at position 0 though, so treat any space below it as unprobed. */ if (final_adjustment_p @@ -253,7 +250,7 @@ index 08a98f8ba..b7da1d0be 100644 if (known_eq (frame_size, 0)) { -@@ -10262,17 +10265,18 @@ aarch64_epilogue_uses (int regno) +@@ -9662,17 +9665,18 @@ aarch64_epilogue_uses (int regno) void aarch64_expand_prologue (void) { @@ -282,7 +279,7 @@ index 08a98f8ba..b7da1d0be 100644 rtx_insn *insn; if (flag_stack_clash_protection && known_eq (callee_adjust, 0)) -@@ -10303,7 +10307,7 @@ aarch64_expand_prologue (void) +@@ -9703,7 +9707,7 @@ aarch64_expand_prologue (void) } /* Push return address to shadow call stack. */ @@ -291,7 +288,7 @@ index 08a98f8ba..b7da1d0be 100644 emit_insn (gen_scs_push ()); if (flag_stack_usage_info) -@@ -10342,7 +10346,7 @@ aarch64_expand_prologue (void) +@@ -9742,7 +9746,7 @@ aarch64_expand_prologue (void) /* The offset of the frame chain record (if any) from the current SP. */ poly_int64 chain_offset = (initial_adjust + callee_adjust @@ -300,9 +297,9 @@ index 08a98f8ba..b7da1d0be 100644 gcc_assert (known_ge (chain_offset, 0)); /* The offset of the bottom of the save area from the current SP. */ -@@ -10445,16 +10449,17 @@ aarch64_use_return_insn_p (void) +@@ -9845,16 +9849,17 @@ aarch64_use_return_insn_p (void) void - aarch64_expand_epilogue (rtx_call_insn *sibcall) + aarch64_expand_epilogue (bool for_sibcall) { - poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; - HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; @@ -327,7 +324,7 @@ index 08a98f8ba..b7da1d0be 100644 ? R29_REGNUM : R30_REGNUM); rtx cfi_ops = NULL; rtx_insn *insn; -@@ -10488,7 +10493,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) +@@ -9888,7 +9893,7 @@ aarch64_expand_epilogue (bool for_sibcall) /* We need to add memory barrier to prevent read from deallocated stack. */ bool need_barrier_p = maybe_ne (get_frame_size () @@ -336,7 +333,7 @@ index 08a98f8ba..b7da1d0be 100644 /* Emit a barrier to prevent loads from a deallocated stack. */ if (maybe_gt (final_adjust, crtl->outgoing_args_size) -@@ -10569,7 +10574,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) +@@ -9969,7 +9974,7 @@ aarch64_expand_epilogue (bool for_sibcall) } /* Pop return address from shadow call stack. */ @@ -345,7 +342,7 @@ index 08a98f8ba..b7da1d0be 100644 { machine_mode mode = aarch64_reg_save_mode (R30_REGNUM); rtx reg = gen_rtx_REG (mode, R30_REGNUM); -@@ -13023,24 +13028,24 @@ aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) +@@ -12564,24 +12569,24 @@ aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) poly_int64 aarch64_initial_elimination_offset (unsigned from, unsigned to) { @@ -377,5 +374,5 @@ index 08a98f8ba..b7da1d0be 100644 -- -2.33.0 +2.43.5 diff --git a/0104-Backport-SME-AArch64-Cleanup-CPU-option-processing-c.patch b/0104-Backport-SME-AArch64-Cleanup-CPU-option-processing-c.patch deleted file mode 100644 index 1f506ac..0000000 --- a/0104-Backport-SME-AArch64-Cleanup-CPU-option-processing-c.patch +++ /dev/null @@ -1,336 +0,0 @@ -From 9a36ca4e9188ee402327ec908d4f6860f2ee67eb Mon Sep 17 00:00:00 2001 -From: Wilco Dijkstra -Date: Wed, 18 May 2022 16:02:12 +0100 -Subject: [PATCH 005/157] [Backport][SME] AArch64: Cleanup CPU option - processing code - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1be715f31605976d8e4336973d3b81c5b7cea79f - -The --with-cpu/--with-arch configure option processing not only checks valid -arguments but also sets TARGET_CPU_DEFAULT with a CPU and extension bitmask. -This isn't used however since a --with-cpu is translated into a -mcpu option -which is processed as if written on the command-line (so TARGET_CPU_DEFAULT -is never accessed). - -So remove all the complex processing and bitmask, and just validate the -option. Fix a bug that always reports valid architecture extensions as invalid. -As a result the CPU processing in aarch64.c can be simplified. - -gcc/ - * config.gcc (aarch64*-*-*): Simplify --with-cpu and --with-arch - processing. Add support for architectural extensions. - * config/aarch64/aarch64.h (TARGET_CPU_DEFAULT): Remove - AARCH64_CPU_DEFAULT_FLAGS. - (TARGET_CPU_NBITS): Remove. - (TARGET_CPU_MASK): Remove. - * config/aarch64/aarch64.cc (AARCH64_CPU_DEFAULT_FLAGS): Remove define. - (get_tune_cpu): Assert CPU is always valid. - (get_arch): Assert architecture is always valid. - (aarch64_override_options): Cleanup CPU selection code and simplify logic. - (aarch64_option_restore): Remove unnecessary checks on tune. ---- - gcc/config.gcc | 43 +------------ - gcc/config/aarch64/aarch64.cc | 115 +++++++++------------------------- - gcc/config/aarch64/aarch64.h | 9 +-- - 3 files changed, 32 insertions(+), 135 deletions(-) - -diff --git a/gcc/config.gcc b/gcc/config.gcc -index 8fdde1576..3be450471 100644 ---- a/gcc/config.gcc -+++ b/gcc/config.gcc -@@ -4190,8 +4190,6 @@ case "${target}" in - pattern=AARCH64_CORE - fi - -- ext_mask=AARCH64_CPU_DEFAULT_FLAGS -- - # Find the base CPU or ARCH id in aarch64-cores.def or - # aarch64-arches.def - if [ x"$base_val" = x ] \ -@@ -4199,23 +4197,6 @@ case "${target}" in - ${srcdir}/config/aarch64/$def \ - > /dev/null; then - -- if [ $which = arch ]; then -- base_id=`grep "^$pattern(\"$base_val\"," \ -- ${srcdir}/config/aarch64/$def | \ -- sed -e 's/^[^,]*,[ ]*//' | \ -- sed -e 's/,.*$//'` -- # Extract the architecture flags from aarch64-arches.def -- ext_mask=`grep "^$pattern(\"$base_val\"," \ -- ${srcdir}/config/aarch64/$def | \ -- sed -e 's/)$//' | \ -- sed -e 's/^.*,//'` -- else -- base_id=`grep "^$pattern(\"$base_val\"," \ -- ${srcdir}/config/aarch64/$def | \ -- sed -e 's/^[^,]*,[ ]*//' | \ -- sed -e 's/,.*$//'` -- fi -- - # Disallow extensions in --with-tune=cortex-a53+crc. - if [ $which = tune ] && [ x"$ext_val" != x ]; then - echo "Architecture extensions not supported in --with-$which=$val" 1>&2 -@@ -4246,25 +4227,7 @@ case "${target}" in - grep "^\"$base_ext\""` - - if [ x"$base_ext" = x ] \ -- || [[ -n $opt_line ]]; then -- -- # These regexp extract the elements based on -- # their group match index in the regexp. -- ext_canon=`echo -e "$opt_line" | \ -- sed -e "s/$sed_patt/\2/"` -- ext_on=`echo -e "$opt_line" | \ -- sed -e "s/$sed_patt/\3/"` -- ext_off=`echo -e "$opt_line" | \ -- sed -e "s/$sed_patt/\4/"` -- -- if [ $ext = $base_ext ]; then -- # Adding extension -- ext_mask="("$ext_mask") | ("$ext_on" | "$ext_canon")" -- else -- # Removing extension -- ext_mask="("$ext_mask") & ~("$ext_off" | "$ext_canon")" -- fi -- -+ || [ x"$opt_line" != x ]; then - true - else - echo "Unknown extension used in --with-$which=$val" 1>&2 -@@ -4273,10 +4236,6 @@ case "${target}" in - ext_val=`echo $ext_val | sed -e 's/[a-z0-9]\+//'` - done - -- ext_mask="(("$ext_mask") << TARGET_CPU_NBITS)" -- if [ x"$base_id" != x ]; then -- target_cpu_cname="TARGET_CPU_$base_id | $ext_mask" -- fi - true - else - # Allow --with-$which=native. -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 7c62ddb2a..ba888beb0 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -3014,8 +3014,6 @@ static const struct attribute_spec aarch64_attribute_table[] = - { NULL, 0, 0, false, false, false, false, NULL, NULL } - }; - --#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0) -- - /* An ISA extension in the co-processor and main instruction set space. */ - struct aarch64_option_extension - { -@@ -18411,39 +18409,24 @@ aarch64_validate_mtune (const char *str, const struct processor **res) - return false; - } - --static_assert (TARGET_CPU_generic < TARGET_CPU_MASK, -- "TARGET_CPU_NBITS is big enough"); -- --/* Return the CPU corresponding to the enum CPU. -- If it doesn't specify a cpu, return the default. */ -+/* Return the CPU corresponding to the enum CPU. */ - - static const struct processor * - aarch64_get_tune_cpu (enum aarch64_processor cpu) - { -- if (cpu != aarch64_none) -- return &all_cores[cpu]; -+ gcc_assert (cpu != aarch64_none); - -- /* The & TARGET_CPU_MASK is to extract the bottom TARGET_CPU_NBITS bits that -- encode the default cpu as selected by the --with-cpu GCC configure option -- in config.gcc. -- ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS -- flags mechanism should be reworked to make it more sane. */ -- return &all_cores[TARGET_CPU_DEFAULT & TARGET_CPU_MASK]; -+ return &all_cores[cpu]; - } - --/* Return the architecture corresponding to the enum ARCH. -- If it doesn't specify a valid architecture, return the default. */ -+/* Return the architecture corresponding to the enum ARCH. */ - - static const struct processor * - aarch64_get_arch (enum aarch64_arch arch) - { -- if (arch != aarch64_no_arch) -- return &all_architectures[arch]; -- -- const struct processor *cpu -- = &all_cores[TARGET_CPU_DEFAULT & TARGET_CPU_MASK]; -+ gcc_assert (arch != aarch64_no_arch); - -- return &all_architectures[cpu->arch]; -+ return &all_architectures[arch]; - } - - /* Return the VG value associated with -msve-vector-bits= value VALUE. */ -@@ -18481,10 +18464,6 @@ aarch64_override_options (void) - uint64_t arch_isa = 0; - aarch64_isa_flags = 0; - -- bool valid_cpu = true; -- bool valid_tune = true; -- bool valid_arch = true; -- - selected_cpu = NULL; - selected_arch = NULL; - selected_tune = NULL; -@@ -18499,77 +18478,56 @@ aarch64_override_options (void) - If either of -march or -mtune is given, they override their - respective component of -mcpu. */ - if (aarch64_cpu_string) -- valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu, -- &cpu_isa); -+ aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu, &cpu_isa); - - if (aarch64_arch_string) -- valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch, -- &arch_isa); -+ aarch64_validate_march (aarch64_arch_string, &selected_arch, &arch_isa); - - if (aarch64_tune_string) -- valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune); -+ aarch64_validate_mtune (aarch64_tune_string, &selected_tune); - - #ifdef SUBTARGET_OVERRIDE_OPTIONS - SUBTARGET_OVERRIDE_OPTIONS; - #endif - -- /* If the user did not specify a processor, choose the default -- one for them. This will be the CPU set during configuration using -- --with-cpu, otherwise it is "generic". */ -- if (!selected_cpu) -- { -- if (selected_arch) -- { -- selected_cpu = &all_cores[selected_arch->ident]; -- aarch64_isa_flags = arch_isa; -- explicit_arch = selected_arch->arch; -- } -- else -- { -- /* Get default configure-time CPU. */ -- selected_cpu = aarch64_get_tune_cpu (aarch64_none); -- aarch64_isa_flags = TARGET_CPU_DEFAULT >> TARGET_CPU_NBITS; -- } -- -- if (selected_tune) -- explicit_tune_core = selected_tune->ident; -- } -- /* If both -mcpu and -march are specified check that they are architecturally -- compatible, warn if they're not and prefer the -march ISA flags. */ -- else if (selected_arch) -+ if (selected_cpu && selected_arch) - { -+ /* If both -mcpu and -march are specified, warn if they are not -+ architecturally compatible and prefer the -march ISA flags. */ - if (selected_arch->arch != selected_cpu->arch) - { - warning (0, "switch %<-mcpu=%s%> conflicts with %<-march=%s%> switch", - aarch64_cpu_string, - aarch64_arch_string); - } -+ - aarch64_isa_flags = arch_isa; -- explicit_arch = selected_arch->arch; -- explicit_tune_core = selected_tune ? selected_tune->ident -- : selected_cpu->ident; - } -- else -+ else if (selected_cpu) - { -- /* -mcpu but no -march. */ -- aarch64_isa_flags = cpu_isa; -- explicit_tune_core = selected_tune ? selected_tune->ident -- : selected_cpu->ident; -- gcc_assert (selected_cpu); - selected_arch = &all_architectures[selected_cpu->arch]; -- explicit_arch = selected_arch->arch; -+ aarch64_isa_flags = cpu_isa; - } -- -- /* Set the arch as well as we will need it when outputing -- the .arch directive in assembly. */ -- if (!selected_arch) -+ else if (selected_arch) - { -- gcc_assert (selected_cpu); -+ selected_cpu = &all_cores[selected_arch->ident]; -+ aarch64_isa_flags = arch_isa; -+ } -+ else -+ { -+ /* No -mcpu or -march specified, so use the default CPU. */ -+ selected_cpu = &all_cores[TARGET_CPU_DEFAULT]; - selected_arch = &all_architectures[selected_cpu->arch]; -+ aarch64_isa_flags = selected_cpu->flags; - } - -+ explicit_arch = selected_arch->arch; - if (!selected_tune) - selected_tune = selected_cpu; -+ explicit_tune_core = selected_tune->ident; -+ -+ gcc_assert (explicit_tune_core != aarch64_none); -+ gcc_assert (explicit_arch != aarch64_no_arch); - - if (aarch64_enable_bti == 2) - { -@@ -18605,15 +18563,6 @@ aarch64_override_options (void) - if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32) - sorry ("return address signing is only supported for %<-mabi=lp64%>"); - -- /* Make sure we properly set up the explicit options. */ -- if ((aarch64_cpu_string && valid_cpu) -- || (aarch64_tune_string && valid_tune)) -- gcc_assert (explicit_tune_core != aarch64_none); -- -- if ((aarch64_cpu_string && valid_cpu) -- || (aarch64_arch_string && valid_arch)) -- gcc_assert (explicit_arch != aarch64_no_arch); -- - /* The pass to insert speculation tracking runs before - shrink-wrapping and the latter does not know how to update the - tracking status. So disable it in this case. */ -@@ -18719,11 +18668,7 @@ aarch64_option_restore (struct gcc_options *opts, - opts->x_explicit_arch = ptr->x_explicit_arch; - selected_arch = aarch64_get_arch (ptr->x_explicit_arch); - opts->x_explicit_tune_core = ptr->x_explicit_tune_core; -- if (opts->x_explicit_tune_core == aarch64_none -- && opts->x_explicit_arch != aarch64_no_arch) -- selected_tune = &all_cores[selected_arch->ident]; -- else -- selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core); -+ selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core); - opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string; - opts->x_aarch64_branch_protection_string - = ptr->x_aarch64_branch_protection_string; -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 6834c3e99..14e2af054 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -811,16 +811,9 @@ enum target_cpus - TARGET_CPU_generic - }; - --/* Define how many bits are used to represent the CPU in TARGET_CPU_DEFAULT. -- This needs to be big enough to fit the value of TARGET_CPU_generic. -- All bits after this are used to represent the AARCH64_CPU_DEFAULT_FLAGS. */ --#define TARGET_CPU_NBITS 8 --#define TARGET_CPU_MASK ((1 << TARGET_CPU_NBITS) - 1) -- - /* If there is no CPU defined at configure, use generic as default. */ - #ifndef TARGET_CPU_DEFAULT --#define TARGET_CPU_DEFAULT \ -- (TARGET_CPU_generic | (AARCH64_CPU_DEFAULT_FLAGS << TARGET_CPU_NBITS)) -+# define TARGET_CPU_DEFAULT TARGET_CPU_generic - #endif - - /* If inserting NOP before a mult-accumulate insn remember to adjust the --- -2.33.0 - diff --git a/0183-Backport-SME-aarch64-Avoid-a-use-of-callee_offset.patch b/0104-aarch64-Avoid-a-use-of-callee-offset.patch similarity index 61% rename from 0183-Backport-SME-aarch64-Avoid-a-use-of-callee_offset.patch rename to 0104-aarch64-Avoid-a-use-of-callee-offset.patch index 7a2c9e2..444c649 100644 --- a/0183-Backport-SME-aarch64-Avoid-a-use-of-callee_offset.patch +++ b/0104-aarch64-Avoid-a-use-of-callee-offset.patch @@ -1,9 +1,7 @@ -From 54a6e52207703a8643fc406175377105f887ebef Mon Sep 17 00:00:00 2001 +From 12a8889de169f892d2e927584c00d20b8b7e456f Mon Sep 17 00:00:00 2001 From: Richard Sandiford -Date: Tue, 12 Sep 2023 16:05:04 +0100 -Subject: [PATCH] [Backport][SME] aarch64: Avoid a use of callee_offset - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f9ab771fa8cd747f34786c6f33deea32c2eb828b +Date: Tue, 12 Sep 2023 16:08:49 +0100 +Subject: [PATCH] aarch64: Avoid a use of callee_offset When we emit the frame chain, i.e. when we reach Here in this statement of aarch64_expand_prologue: @@ -49,17 +47,10 @@ gcc/ 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index b7da1d0be..fbd7a079a 100644 +index ae42ffdedbeb..79253322fd7c 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc -@@ -10263,21 +10263,20 @@ aarch64_epilogue_uses (int regno) - current FP is also set up if it is in use. */ - - void - aarch64_expand_prologue (void) - { - aarch64_frame &frame = cfun->machine->frame; - poly_int64 frame_size = frame.frame_size; +@@ -9670,7 +9670,6 @@ aarch64_expand_prologue (void) poly_int64 initial_adjust = frame.initial_adjust; HOST_WIDE_INT callee_adjust = frame.callee_adjust; poly_int64 final_adjust = frame.final_adjust; @@ -67,21 +58,7 @@ index b7da1d0be..fbd7a079a 100644 poly_int64 sve_callee_adjust = frame.sve_callee_adjust; poly_int64 below_hard_fp_saved_regs_size = frame.below_hard_fp_saved_regs_size; - unsigned reg1 = frame.wb_push_candidate1; - unsigned reg2 = frame.wb_push_candidate2; - bool emit_frame_chain = frame.emit_frame_chain; - rtx_insn *insn; - - if (flag_stack_clash_protection && known_eq (callee_adjust, 0)) - { -@@ -10376,22 +10375,21 @@ aarch64_expand_prologue (void) - the CFA based on the frame pointer. We therefore need new - DW_CFA_expressions to re-express the save slots with addresses - based on the frame pointer. */ - rtx_insn *insn = get_last_insn (); - gcc_assert (RTX_FRAME_RELATED_P (insn)); - - /* Add an explicit CFA definition if this was previously +@@ -9783,8 +9782,7 @@ aarch64_expand_prologue (void) implicit. */ if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX)) { @@ -91,13 +68,6 @@ index b7da1d0be..fbd7a079a 100644 add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (hard_frame_pointer_rtx, src)); } - - /* Change the save slot expressions for the registers that - we've already saved. */ - aarch64_add_cfa_expression (insn, regno_reg_rtx[reg2], - hard_frame_pointer_rtx, UNITS_PER_WORD); - aarch64_add_cfa_expression (insn, regno_reg_rtx[reg1], - hard_frame_pointer_rtx, 0); -- -2.38.1.windows.1 +2.43.5 diff --git a/0105-Backport-SME-AArch64-Cleanup-option-processing-code.patch b/0105-Backport-SME-AArch64-Cleanup-option-processing-code.patch deleted file mode 100644 index c945c81..0000000 --- a/0105-Backport-SME-AArch64-Cleanup-option-processing-code.patch +++ /dev/null @@ -1,528 +0,0 @@ -From ba32885874fc6caa90f6ae5e264bc3d51f64a26e Mon Sep 17 00:00:00 2001 -From: Wilco Dijkstra -Date: Wed, 1 Jun 2022 16:46:36 +0100 -Subject: [PATCH 006/157] [Backport][SME] AArch64: Cleanup option processing - code - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ae54c1b09963779c5c3914782324ff48af32e2f1 - -Further cleanup option processing. Remove the duplication of global -variables for CPU and tune settings so that CPU option processing is -simplified even further. Move global variables that need save and -restore due to target option processing into aarch64.opt. This removes -the need for explicit saving/restoring and unnecessary reparsing of -options. - -gcc/ - * config/aarch64/aarch64.opt (explicit_tune_core): Rename to - selected_tune. - (explicit_arch): Rename to selected_arch. - (x_aarch64_override_tune_string): Remove. - (aarch64_ra_sign_key): Add as TargetVariable so it gets saved/restored. - (aarch64_override_tune_string): Add Save so it gets saved/restored. - * config/aarch64/aarch64.h (aarch64_architecture_version): Remove. - * config/aarch64/aarch64.cc (aarch64_architecture_version): Remove. - (processor): Remove archtecture_version field. - (selected_arch): Remove global. - (selected_cpu): Remove global. - (selected_tune): Remove global. - (aarch64_ra_sign_key): Move global to aarch64.opt so it is saved. - (aarch64_override_options_internal): Use aarch64_get_tune_cpu. - (aarch64_override_options): Further simplify code to only set - selected_arch and selected_tune globals. - (aarch64_option_save): Remove now that target options are saved. - (aarch64_option_restore): Remove redundant target option restores. - * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Use - AARCH64_ISA_V9. - * config/aarch64/aarch64-opts.h (aarch64_key_type): Add, moved from... - * config/aarch64/aarch64-protos.h (aarch64_key_type): Remove. - (aarch64_ra_sign_key): Remove. ---- - gcc/config/aarch64/aarch64-c.cc | 2 +- - gcc/config/aarch64/aarch64-opts.h | 6 + - gcc/config/aarch64/aarch64-protos.h | 8 -- - gcc/config/aarch64/aarch64.cc | 183 ++++++++++------------------ - gcc/config/aarch64/aarch64.h | 3 - - gcc/config/aarch64/aarch64.opt | 12 +- - 6 files changed, 76 insertions(+), 138 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc -index a4c407724..90d45e45d 100644 ---- a/gcc/config/aarch64/aarch64-c.cc -+++ b/gcc/config/aarch64/aarch64-c.cc -@@ -82,7 +82,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) - { - aarch64_def_or_undef (flag_unsafe_math_optimizations, "__ARM_FP_FAST", pfile); - -- builtin_define_with_int_value ("__ARM_ARCH", aarch64_architecture_version); -+ builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9 ? 9 : 8); - - builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM", - flag_short_enums ? 1 : 4); -diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h -index 93572fe83..421648a15 100644 ---- a/gcc/config/aarch64/aarch64-opts.h -+++ b/gcc/config/aarch64/aarch64-opts.h -@@ -98,4 +98,10 @@ enum stack_protector_guard { - SSP_GLOBAL /* global canary */ - }; - -+/* The key type that -msign-return-address should use. */ -+enum aarch64_key_type { -+ AARCH64_KEY_A, -+ AARCH64_KEY_B -+}; -+ - #endif -diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h -index 475d174dd..e60ce3c36 100644 ---- a/gcc/config/aarch64/aarch64-protos.h -+++ b/gcc/config/aarch64/aarch64-protos.h -@@ -672,14 +672,6 @@ enum simd_immediate_check { - AARCH64_CHECK_MOV = AARCH64_CHECK_ORR | AARCH64_CHECK_BIC - }; - --/* The key type that -msign-return-address should use. */ --enum aarch64_key_type { -- AARCH64_KEY_A, -- AARCH64_KEY_B --}; -- --extern enum aarch64_key_type aarch64_ra_sign_key; -- - extern struct tune_params aarch64_tune_params; - - /* The available SVE predicate patterns, known in the ACLE as "svpattern". */ -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index ba888beb0..254ecfaa2 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -306,9 +306,6 @@ static bool aarch64_print_address_internal (FILE*, machine_mode, rtx, - aarch64_addr_query_type); - static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val); - --/* Major revision number of the ARM Architecture implemented by the target. */ --unsigned aarch64_architecture_version; -- - /* The processor for which instructions should be scheduled. */ - enum aarch64_processor aarch64_tune = cortexa53; - -@@ -2931,7 +2928,6 @@ struct processor - enum aarch64_processor ident; - enum aarch64_processor sched_core; - enum aarch64_arch arch; -- unsigned architecture_version; - const uint64_t flags; - const struct tune_params *const tune; - }; -@@ -2940,9 +2936,9 @@ struct processor - static const struct processor all_architectures[] = - { - #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \ -- {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL}, -+ {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, FLAGS, NULL}, - #include "aarch64-arches.def" -- {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL} -+ {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL} - }; - - /* Processor cores implementing AArch64. */ -@@ -2950,23 +2946,13 @@ static const struct processor all_cores[] = - { - #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \ - {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \ -- all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \ - FLAGS, &COSTS##_tunings}, - #include "aarch64-cores.def" -- {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8, -+ {"generic", generic, cortexa53, AARCH64_ARCH_8A, - AARCH64_FL_FOR_ARCH8, &generic_tunings}, -- {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL} -+ {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL} - }; - -- --/* Target specification. These are populated by the -march, -mtune, -mcpu -- handling code or by target attributes. */ --static const struct processor *selected_arch; --static const struct processor *selected_cpu; --static const struct processor *selected_tune; -- --enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A; -- - /* The current tuning set. */ - struct tune_params aarch64_tune_params = generic_tunings; - -@@ -10633,8 +10619,8 @@ aarch64_case_values_threshold (void) - /* Use the specified limit for the number of cases before using jump - tables at higher optimization levels. */ - if (optimize > 2 -- && selected_cpu->tune->max_case_values != 0) -- return selected_cpu->tune->max_case_values; -+ && aarch64_tune_params.max_case_values != 0) -+ return aarch64_tune_params.max_case_values; - else - return optimize_size ? 8 : 11; - } -@@ -17769,6 +17755,26 @@ initialize_aarch64_tls_size (struct gcc_options *opts) - return; - } - -+/* Return the CPU corresponding to the enum CPU. */ -+ -+static const struct processor * -+aarch64_get_tune_cpu (enum aarch64_processor cpu) -+{ -+ gcc_assert (cpu != aarch64_none); -+ -+ return &all_cores[cpu]; -+} -+ -+/* Return the architecture corresponding to the enum ARCH. */ -+ -+static const struct processor * -+aarch64_get_arch (enum aarch64_arch arch) -+{ -+ gcc_assert (arch != aarch64_no_arch); -+ -+ return &all_architectures[arch]; -+} -+ - /* Parse STRING looking for options in the format: - string :: option:string - option :: name=substring -@@ -17879,18 +17885,18 @@ aarch64_override_options_after_change_1 (struct gcc_options *opts) - void - aarch64_override_options_internal (struct gcc_options *opts) - { -- aarch64_tune_flags = selected_tune->flags; -- aarch64_tune = selected_tune->sched_core; -+ const struct processor *tune = aarch64_get_tune_cpu (opts->x_selected_tune); -+ aarch64_tune_flags = tune->flags; -+ aarch64_tune = tune->sched_core; - /* Make a copy of the tuning parameters attached to the core, which - we may later overwrite. */ -- aarch64_tune_params = *(selected_tune->tune); -- aarch64_architecture_version = selected_arch->architecture_version; -- if (selected_tune->tune == &generic_tunings) -+ aarch64_tune_params = *(tune->tune); -+ if (tune->tune == &generic_tunings) - aarch64_adjust_generic_arch_tuning (aarch64_tune_params); - - if (opts->x_aarch64_override_tune_string) - aarch64_parse_override_string (opts->x_aarch64_override_tune_string, -- &aarch64_tune_params); -+ &aarch64_tune_params); - - /* This target defaults to strict volatile bitfields. */ - if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2)) -@@ -18051,13 +18057,6 @@ aarch64_override_options_internal (struct gcc_options *opts) - && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level) - opts->x_flag_prefetch_loop_arrays = 1; - -- if (opts->x_aarch64_arch_string == NULL) -- opts->x_aarch64_arch_string = selected_arch->name; -- if (opts->x_aarch64_cpu_string == NULL) -- opts->x_aarch64_cpu_string = selected_cpu->name; -- if (opts->x_aarch64_tune_string == NULL) -- opts->x_aarch64_tune_string = selected_tune->name; -- - aarch64_override_options_after_change_1 (opts); - } - -@@ -18409,26 +18408,6 @@ aarch64_validate_mtune (const char *str, const struct processor **res) - return false; - } - --/* Return the CPU corresponding to the enum CPU. */ -- --static const struct processor * --aarch64_get_tune_cpu (enum aarch64_processor cpu) --{ -- gcc_assert (cpu != aarch64_none); -- -- return &all_cores[cpu]; --} -- --/* Return the architecture corresponding to the enum ARCH. */ -- --static const struct processor * --aarch64_get_arch (enum aarch64_arch arch) --{ -- gcc_assert (arch != aarch64_no_arch); -- -- return &all_architectures[arch]; --} -- - /* Return the VG value associated with -msve-vector-bits= value VALUE. */ - - static poly_uint16 -@@ -18464,9 +18443,9 @@ aarch64_override_options (void) - uint64_t arch_isa = 0; - aarch64_isa_flags = 0; - -- selected_cpu = NULL; -- selected_arch = NULL; -- selected_tune = NULL; -+ const struct processor *cpu = NULL; -+ const struct processor *arch = NULL; -+ const struct processor *tune = NULL; - - if (aarch64_harden_sls_string) - aarch64_validate_sls_mitigation (aarch64_harden_sls_string); -@@ -18478,56 +18457,52 @@ aarch64_override_options (void) - If either of -march or -mtune is given, they override their - respective component of -mcpu. */ - if (aarch64_cpu_string) -- aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu, &cpu_isa); -+ aarch64_validate_mcpu (aarch64_cpu_string, &cpu, &cpu_isa); - - if (aarch64_arch_string) -- aarch64_validate_march (aarch64_arch_string, &selected_arch, &arch_isa); -+ aarch64_validate_march (aarch64_arch_string, &arch, &arch_isa); - - if (aarch64_tune_string) -- aarch64_validate_mtune (aarch64_tune_string, &selected_tune); -+ aarch64_validate_mtune (aarch64_tune_string, &tune); - - #ifdef SUBTARGET_OVERRIDE_OPTIONS - SUBTARGET_OVERRIDE_OPTIONS; - #endif - -- if (selected_cpu && selected_arch) -+ if (cpu && arch) - { - /* If both -mcpu and -march are specified, warn if they are not - architecturally compatible and prefer the -march ISA flags. */ -- if (selected_arch->arch != selected_cpu->arch) -+ if (arch->arch != cpu->arch) - { - warning (0, "switch %<-mcpu=%s%> conflicts with %<-march=%s%> switch", - aarch64_cpu_string, - aarch64_arch_string); - } - -+ selected_arch = arch->arch; - aarch64_isa_flags = arch_isa; - } -- else if (selected_cpu) -+ else if (cpu) - { -- selected_arch = &all_architectures[selected_cpu->arch]; -+ selected_arch = cpu->arch; - aarch64_isa_flags = cpu_isa; - } -- else if (selected_arch) -+ else if (arch) - { -- selected_cpu = &all_cores[selected_arch->ident]; -+ cpu = &all_cores[arch->ident]; -+ selected_arch = arch->arch; - aarch64_isa_flags = arch_isa; - } - else - { - /* No -mcpu or -march specified, so use the default CPU. */ -- selected_cpu = &all_cores[TARGET_CPU_DEFAULT]; -- selected_arch = &all_architectures[selected_cpu->arch]; -- aarch64_isa_flags = selected_cpu->flags; -+ cpu = &all_cores[TARGET_CPU_DEFAULT]; -+ selected_arch = cpu->arch; -+ aarch64_isa_flags = cpu->flags; - } - -- explicit_arch = selected_arch->arch; -- if (!selected_tune) -- selected_tune = selected_cpu; -- explicit_tune_core = selected_tune->ident; -- -- gcc_assert (explicit_tune_core != aarch64_none); -- gcc_assert (explicit_arch != aarch64_no_arch); -+ selected_tune = tune ? tune->ident : cpu->ident; - - if (aarch64_enable_bti == 2) - { -@@ -18646,38 +18621,14 @@ initialize_aarch64_code_model (struct gcc_options *opts) - } - } - --/* Implement TARGET_OPTION_SAVE. */ -- --static void --aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts, -- struct gcc_options */* opts_set */) --{ -- ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string; -- ptr->x_aarch64_branch_protection_string -- = opts->x_aarch64_branch_protection_string; --} -- - /* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions - using the information saved in PTR. */ - - static void - aarch64_option_restore (struct gcc_options *opts, -- struct gcc_options */* opts_set */, -- struct cl_target_option *ptr) -+ struct gcc_options * /* opts_set */, -+ struct cl_target_option * /* ptr */) - { -- opts->x_explicit_arch = ptr->x_explicit_arch; -- selected_arch = aarch64_get_arch (ptr->x_explicit_arch); -- opts->x_explicit_tune_core = ptr->x_explicit_tune_core; -- selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core); -- opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string; -- opts->x_aarch64_branch_protection_string -- = ptr->x_aarch64_branch_protection_string; -- if (opts->x_aarch64_branch_protection_string) -- { -- aarch64_parse_branch_protection (opts->x_aarch64_branch_protection_string, -- NULL); -- } -- - aarch64_override_options_internal (opts); - } - -@@ -18687,11 +18638,11 @@ static void - aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr) - { - const struct processor *cpu -- = aarch64_get_tune_cpu (ptr->x_explicit_tune_core); -- uint64_t isa_flags = ptr->x_aarch64_isa_flags; -- const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch); -+ = aarch64_get_tune_cpu (ptr->x_selected_tune); -+ const struct processor *arch = aarch64_get_arch (ptr->x_selected_arch); - std::string extension -- = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags); -+ = aarch64_get_extension_string_for_isa_flags (ptr->x_aarch64_isa_flags, -+ arch->flags); - - fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name); - fprintf (file, "%*sselected arch = %s%s\n", indent, "", -@@ -18804,8 +18755,7 @@ aarch64_handle_attr_arch (const char *str) - if (parse_res == AARCH64_PARSE_OK) - { - gcc_assert (tmp_arch); -- selected_arch = tmp_arch; -- explicit_arch = selected_arch->arch; -+ selected_arch = tmp_arch->arch; - return true; - } - -@@ -18843,11 +18793,8 @@ aarch64_handle_attr_cpu (const char *str) - if (parse_res == AARCH64_PARSE_OK) - { - gcc_assert (tmp_cpu); -- selected_tune = tmp_cpu; -- explicit_tune_core = selected_tune->ident; -- -- selected_arch = &all_architectures[tmp_cpu->arch]; -- explicit_arch = selected_arch->arch; -+ selected_tune = tmp_cpu->ident; -+ selected_arch = tmp_cpu->arch; - return true; - } - -@@ -18915,8 +18862,7 @@ aarch64_handle_attr_tune (const char *str) - if (parse_res == AARCH64_PARSE_OK) - { - gcc_assert (tmp_tune); -- selected_tune = tmp_tune; -- explicit_tune_core = selected_tune->ident; -+ selected_tune = tmp_tune->ident; - return true; - } - -@@ -22821,7 +22767,7 @@ aarch64_declare_function_name (FILE *stream, const char* name, - gcc_assert (targ_options); - - const struct processor *this_arch -- = aarch64_get_arch (targ_options->x_explicit_arch); -+ = aarch64_get_arch (targ_options->x_selected_arch); - - uint64_t isa_flags = targ_options->x_aarch64_isa_flags; - std::string extension -@@ -22840,7 +22786,7 @@ aarch64_declare_function_name (FILE *stream, const char* name, - useful to readers of the generated asm. Do it only when it changes - from function to function and verbose assembly is requested. */ - const struct processor *this_tune -- = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core); -+ = aarch64_get_tune_cpu (targ_options->x_selected_tune); - - if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name) - { -@@ -22952,7 +22898,7 @@ aarch64_start_file (void) - = TREE_TARGET_OPTION (target_option_default_node); - - const struct processor *default_arch -- = aarch64_get_arch (default_options->x_explicit_arch); -+ = aarch64_get_arch (default_options->x_selected_arch); - uint64_t default_isa_flags = default_options->x_aarch64_isa_flags; - std::string extension - = aarch64_get_extension_string_for_isa_flags (default_isa_flags, -@@ -27950,9 +27896,6 @@ aarch64_libgcc_floating_mode_supported_p - #undef TARGET_OFFLOAD_OPTIONS - #define TARGET_OFFLOAD_OPTIONS aarch64_offload_options - --#undef TARGET_OPTION_SAVE --#define TARGET_OPTION_SAVE aarch64_option_save -- - #undef TARGET_OPTION_RESTORE - #define TARGET_OPTION_RESTORE aarch64_option_restore - -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 14e2af054..7d73689e4 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -144,9 +144,6 @@ - - #define PCC_BITFIELD_TYPE_MATTERS 1 - --/* Major revision number of the ARM Architecture implemented by the target. */ --extern unsigned aarch64_architecture_version; -- - /* Instruction tuning/selection flags. */ - - /* Bit values used to identify processor capabilities. */ -diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt -index 101664c7c..836a3c784 100644 ---- a/gcc/config/aarch64/aarch64.opt -+++ b/gcc/config/aarch64/aarch64.opt -@@ -22,13 +22,10 @@ HeaderInclude - config/aarch64/aarch64-opts.h - - TargetVariable --enum aarch64_processor explicit_tune_core = aarch64_none -+enum aarch64_processor selected_tune = aarch64_none - - TargetVariable --enum aarch64_arch explicit_arch = aarch64_no_arch -- --TargetSave --const char *x_aarch64_override_tune_string -+enum aarch64_arch selected_arch = aarch64_no_arch - - TargetVariable - uint64_t aarch64_isa_flags = 0 -@@ -36,6 +33,9 @@ uint64_t aarch64_isa_flags = 0 - TargetVariable - unsigned aarch64_enable_bti = 2 - -+TargetVariable -+enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A -+ - ; The TLS dialect names to use with -mtls-dialect. - - Enum -@@ -139,7 +139,7 @@ Target RejectNegative Joined Enum(aarch64_abi) Var(aarch64_abi) Init(AARCH64_ABI - Generate code that conforms to the specified ABI. - - moverride= --Target RejectNegative ToLower Joined Var(aarch64_override_tune_string) -+Target RejectNegative ToLower Joined Var(aarch64_override_tune_string) Save - -moverride= Power users only! Override CPU optimization parameters. - - Enum --- -2.33.0 - diff --git a/0184-Backport-SME-aarch64-Explicitly-handle-frames-with-n.patch b/0105-aarch64-Explicitly-handle-frames-with-no-saved-registers.patch similarity index 80% rename from 0184-Backport-SME-aarch64-Explicitly-handle-frames-with-n.patch rename to 0105-aarch64-Explicitly-handle-frames-with-no-saved-registers.patch index 3af28ed..f0b4a0a 100644 --- a/0184-Backport-SME-aarch64-Explicitly-handle-frames-with-n.patch +++ b/0105-aarch64-Explicitly-handle-frames-with-no-saved-registers.patch @@ -1,10 +1,7 @@ -From 82bbe6513987a7656150110164e25f44fe410796 Mon Sep 17 00:00:00 2001 +From 03d5e89e7f3be53fd7142556e8e0a2774c653dca Mon Sep 17 00:00:00 2001 From: Richard Sandiford -Date: Tue, 12 Sep 2023 16:05:05 +0100 -Subject: [PATCH 085/157] [Backport][SME] aarch64: Explicitly handle frames - with no saved registers - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c601c918c9ac01ef8315774a642ff924f77c85e5 +Date: Tue, 12 Sep 2023 16:08:49 +0100 +Subject: [PATCH] aarch64: Explicitly handle frames with no saved registers If a frame has no saved registers, it can be allocated in one go. There is no need to treat the areas below and above the saved @@ -28,10 +25,10 @@ gcc/ 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index fbd7a079a..c59af6b1c 100644 +index 79253322fd7c..e1f21230c15e 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc -@@ -8978,9 +8978,11 @@ aarch64_layout_frame (void) +@@ -8378,9 +8378,11 @@ aarch64_layout_frame (void) HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset; HOST_WIDE_INT const_saved_regs_size; @@ -47,5 +44,5 @@ index fbd7a079a..c59af6b1c 100644 /* Simple, small frame with no outgoing arguments: -- -2.33.0 +2.43.5 diff --git a/0106-Backport-SME-aarch64-Add-march-support-for-Armv9.1-A.patch b/0106-Backport-SME-aarch64-Add-march-support-for-Armv9.1-A.patch deleted file mode 100644 index 1655fab..0000000 --- a/0106-Backport-SME-aarch64-Add-march-support-for-Armv9.1-A.patch +++ /dev/null @@ -1,108 +0,0 @@ -From 0bfb7b0b745d0a9af13772ad48ccc102e557f95a Mon Sep 17 00:00:00 2001 -From: Kyrylo Tkachov -Date: Mon, 26 Sep 2022 10:10:25 +0100 -Subject: [PATCH 007/157] [Backport][SME] aarch64: Add -march support for - Armv9.1-A, Armv9.2-A, Armv9.3-A - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c33e12fa479c01848f4a288883bf1ef848c94ca3 - -This is a straightforward patch that allows targeting the architecture revisions mentioned in the subject -through -march. These are already supported in binutils. - -Bootstrapped and tested on aarch64-none-linux-gnu. - -gcc/ChangeLog: - - * config/aarch64/aarch64-arches.def (armv9.1-a): Define. - (armv9.2-a): Likewise. - (armv9.3-a): Likewise. - * config/aarch64/aarch64.h (AARCH64_FL_V9_1): Likewise. - (AARCH64_FL_V9_2): Likewise. - (AARCH64_FL_V9_3): Likewise. - (AARCH64_FL_FOR_ARCH9_1): Likewise. - (AARCH64_FL_FOR_ARCH9_2): Likewise. - (AARCH64_FL_FOR_ARCH9_3): Likewise. - (AARCH64_ISA_V9_1): Likewise. - (AARCH64_ISA_V9_2): Likewise. - (AARCH64_ISA_V9_3): Likewise. - * doc/invoke.texi (AArch64 Options): Document armv9.1-a, armv9.2-a, - armv9.3-a values to -march. ---- - gcc/config/aarch64/aarch64-arches.def | 3 +++ - gcc/config/aarch64/aarch64.h | 18 ++++++++++++++++++ - gcc/doc/invoke.texi | 3 +++ - 3 files changed, 24 insertions(+) - -diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def -index 3c2b16588..6150448dc 100644 ---- a/gcc/config/aarch64/aarch64-arches.def -+++ b/gcc/config/aarch64/aarch64-arches.def -@@ -41,5 +41,8 @@ AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_ARCH8 - AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_ARCH8_8) - AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_ARCH8_R) - AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_ARCH9) -+AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_ARCH9_1) -+AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_ARCH9_2) -+AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_ARCH9_3) - - #undef AARCH64_ARCH -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 7d73689e4..42aae37ef 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -239,6 +239,15 @@ - /* Armv8.8-a architecture extensions. */ - #define AARCH64_FL_V8_8 (1ULL << 45) - -+/* Armv9.1-A. */ -+#define AARCH64_FL_V9_1 (1ULL << 46) -+ -+/* Armv9.2-A. */ -+#define AARCH64_FL_V9_2 (1ULL << 47) -+ -+/* Armv9.3-A. */ -+#define AARCH64_FL_V9_3 (1ULL << 48) -+ - /* Has FP and SIMD. */ - #define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD) - -@@ -274,6 +283,12 @@ - #define AARCH64_FL_FOR_ARCH9 \ - (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9 \ - | AARCH64_FL_F16) -+#define AARCH64_FL_FOR_ARCH9_1 \ -+ (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1) -+#define AARCH64_FL_FOR_ARCH9_2 \ -+ (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2) -+#define AARCH64_FL_FOR_ARCH9_3 \ -+ (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3) - - /* Macros to test ISA flags. */ - -@@ -314,6 +329,9 @@ - #define AARCH64_ISA_V8_R (aarch64_isa_flags & AARCH64_FL_V8_R) - #define AARCH64_ISA_PAUTH (aarch64_isa_flags & AARCH64_FL_PAUTH) - #define AARCH64_ISA_V9 (aarch64_isa_flags & AARCH64_FL_V9) -+#define AARCH64_ISA_V9_1 (aarch64_isa_flags & AARCH64_FL_V9_1) -+#define AARCH64_ISA_V9_2 (aarch64_isa_flags & AARCH64_FL_V9_2) -+#define AARCH64_ISA_V9_3 (aarch64_isa_flags & AARCH64_FL_V9_3) - #define AARCH64_ISA_MOPS (aarch64_isa_flags & AARCH64_FL_MOPS) - #define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64) - -diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi -index 17d9e4126..53709b246 100644 ---- a/gcc/doc/invoke.texi -+++ b/gcc/doc/invoke.texi -@@ -19176,6 +19176,9 @@ and the features that they enable by default: - @item @samp{armv8.7-a} @tab Armv8.7-A @tab @samp{armv8.6-a}, @samp{+ls64} - @item @samp{armv8.8-a} @tab Armv8.8-a @tab @samp{armv8.7-a}, @samp{+mops} - @item @samp{armv9-a} @tab Armv9-A @tab @samp{armv8.5-a}, @samp{+sve}, @samp{+sve2} -+@item @samp{armv9.1-a} @tab Armv9.1-A @tab @samp{armv9-a}, @samp{+bf16}, @samp{+i8mm} -+@item @samp{armv9.2-a} @tab Armv9.2-A @tab @samp{armv9.1-a}, @samp{+ls64} -+@item @samp{armv9.3-a} @tab Armv9.3-A @tab @samp{armv9.2-a}, @samp{+mops} - @item @samp{armv8-r} @tab Armv8-R @tab @samp{armv8-r} - @end multitable - --- -2.33.0 - diff --git a/0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch b/0106-aarch64-Add-bytes-below-saved-regs-to-frame-info.patch similarity index 90% rename from 0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch rename to 0106-aarch64-Add-bytes-below-saved-regs-to-frame-info.patch index f754f97..493cef3 100644 --- a/0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch +++ b/0106-aarch64-Add-bytes-below-saved-regs-to-frame-info.patch @@ -1,10 +1,7 @@ -From bf985fe08b6298218180666a7d20f4aa0b41326f Mon Sep 17 00:00:00 2001 +From 49c2eb7616756c323b7f6b18d8616ec945eb1263 Mon Sep 17 00:00:00 2001 From: Richard Sandiford -Date: Tue, 12 Sep 2023 16:05:05 +0100 -Subject: [PATCH 086/157] [Backport][SME] aarch64: Add bytes_below_saved_regs - to frame info - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7b792ecaa9414bc81520b3da552d40ad854be976 +Date: Tue, 12 Sep 2023 16:08:49 +0100 +Subject: [PATCH] aarch64: Add bytes_below_saved_regs to frame info The frame layout code currently hard-codes the assumption that the number of bytes below the saved registers is equal to the @@ -25,10 +22,10 @@ gcc/ 2 files changed, 41 insertions(+), 35 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index c59af6b1c..5533dd85b 100644 +index e1f21230c15e..94e1b6865849 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc -@@ -8817,6 +8817,8 @@ aarch64_layout_frame (void) +@@ -8217,6 +8217,8 @@ aarch64_layout_frame (void) gcc_assert (crtl->is_leaf || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED)); @@ -37,7 +34,7 @@ index c59af6b1c..5533dd85b 100644 /* Now assign stack slots for the registers. Start with the predicate registers, since predicate LDR and STR have a relatively small offset range. These saves happen below the hard frame pointer. */ -@@ -8921,18 +8923,18 @@ aarch64_layout_frame (void) +@@ -8321,18 +8323,18 @@ aarch64_layout_frame (void) poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size; @@ -60,7 +57,7 @@ index c59af6b1c..5533dd85b 100644 frame.locals_offset = frame.saved_varargs_size; -@@ -8976,7 +8978,7 @@ aarch64_layout_frame (void) +@@ -8376,7 +8378,7 @@ aarch64_layout_frame (void) else if (frame.wb_pop_candidate1 != INVALID_REGNUM) max_push_offset = 256; @@ -69,7 +66,7 @@ index c59af6b1c..5533dd85b 100644 HOST_WIDE_INT const_saved_regs_size; if (known_eq (frame.saved_regs_size, 0)) frame.initial_adjust = frame.frame_size; -@@ -8984,31 +8986,31 @@ aarch64_layout_frame (void) +@@ -8384,31 +8386,31 @@ aarch64_layout_frame (void) && const_size < max_push_offset && known_eq (frame.hard_fp_offset, const_size)) { @@ -113,7 +110,7 @@ index c59af6b1c..5533dd85b 100644 } else if (saves_below_hard_fp_p && known_eq (frame.saved_regs_size, -@@ -9018,30 +9020,29 @@ aarch64_layout_frame (void) +@@ -8418,30 +8420,29 @@ aarch64_layout_frame (void) sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size save SVE registers relative to SP @@ -151,7 +148,7 @@ index c59af6b1c..5533dd85b 100644 sub sp, sp, hard_fp_offset stp x29, x30, [sp, 0] -@@ -9049,10 +9050,10 @@ aarch64_layout_frame (void) +@@ -8449,10 +8450,10 @@ aarch64_layout_frame (void) stp reg3, reg4, [sp, 16] [sub sp, sp, below_hard_fp_saved_regs_size] [save SVE registers relative to SP] @@ -164,7 +161,7 @@ index c59af6b1c..5533dd85b 100644 } /* Make sure the individual adjustments add up to the full frame size. */ -@@ -9643,7 +9644,7 @@ aarch64_get_separate_components (void) +@@ -9043,7 +9044,7 @@ aarch64_get_separate_components (void) if (frame_pointer_needed) offset -= frame.below_hard_fp_saved_regs_size; else @@ -173,7 +170,7 @@ index c59af6b1c..5533dd85b 100644 /* Check that we can access the stack slot of the register with one direct load with no adjustments needed. */ -@@ -9792,7 +9793,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) +@@ -9192,7 +9193,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) if (frame_pointer_needed) offset -= frame.below_hard_fp_saved_regs_size; else @@ -182,7 +179,7 @@ index c59af6b1c..5533dd85b 100644 rtx addr = plus_constant (Pmode, ptr_reg, offset); rtx mem = gen_frame_mem (mode, addr); -@@ -9846,7 +9847,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) +@@ -9246,7 +9247,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) if (frame_pointer_needed) offset2 -= frame.below_hard_fp_saved_regs_size; else @@ -191,7 +188,7 @@ index c59af6b1c..5533dd85b 100644 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); rtx mem2 = gen_frame_mem (mode, addr2); rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2) -@@ -9920,10 +9921,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void) +@@ -9320,10 +9321,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void) registers. If POLY_SIZE is not large enough to require a probe this function will only adjust the stack. When allocating the stack space FRAME_RELATED_P is then used to indicate if the allocation is frame related. @@ -206,7 +203,7 @@ index c59af6b1c..5533dd85b 100644 We emit barriers after each stack adjustment to prevent optimizations from breaking the invariant that we never drop the stack more than a page. This -@@ -10132,7 +10133,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, +@@ -9532,7 +9533,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, /* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to be probed. This maintains the requirement that each page is probed at least once. For initial probing we probe only if the allocation is @@ -216,10 +213,10 @@ index c59af6b1c..5533dd85b 100644 GUARD_SIZE. This works that for any allocation that is large enough to trigger a probe here, we'll have at least one, and if they're not large diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 8f0ac2cde..9e0ca380e 100644 +index 6834c3e99226..1e105e12db8d 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h -@@ -801,6 +801,11 @@ struct GTY (()) aarch64_frame +@@ -871,6 +871,11 @@ struct GTY (()) aarch64_frame /* The size of the callee-save registers with a slot in REG_OFFSET. */ poly_int64 saved_regs_size; @@ -232,5 +229,5 @@ index 8f0ac2cde..9e0ca380e 100644 are saved below the hard frame pointer. */ poly_int64 below_hard_fp_saved_regs_size; -- -2.33.0 +2.43.5 diff --git a/0107-Backport-SME-Revert-aarch64-Define-__ARM_FEATURE_RCP.patch b/0107-Backport-SME-Revert-aarch64-Define-__ARM_FEATURE_RCP.patch deleted file mode 100644 index 4de737c..0000000 --- a/0107-Backport-SME-Revert-aarch64-Define-__ARM_FEATURE_RCP.patch +++ /dev/null @@ -1,112 +0,0 @@ -From b36c8c41cab42d3df45197bb287f06381d660001 Mon Sep 17 00:00:00 2001 -From: xiezhiheng -Date: Mon, 19 Feb 2024 19:27:29 +0800 -Subject: [PATCH 008/157] [Backport][SME] Revert "aarch64: Define - __ARM_FEATURE_RCPC" - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=40a727379f3e8e6a83aea4e94c38dfa5dd8ef33d - -Revert this commit to solve conflicts with later patches, -and will apply it later. ---- - gcc/config/aarch64/aarch64-c.cc | 1 - - gcc/config/aarch64/aarch64-cores.def | 10 +++++----- - gcc/config/aarch64/aarch64.h | 4 +--- - .../gcc.target/aarch64/pragma_cpp_predefs_1.c | 20 ------------------- - 4 files changed, 6 insertions(+), 29 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc -index 90d45e45d..3d2fb5ec2 100644 ---- a/gcc/config/aarch64/aarch64-c.cc -+++ b/gcc/config/aarch64/aarch64-c.cc -@@ -202,7 +202,6 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) - "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile); - aarch64_def_or_undef (TARGET_LS64, - "__ARM_FEATURE_LS64", pfile); -- aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile); - - /* Not for ACLE, but required to keep "float.h" correct if we switch - target between implementations that do or do not support ARMv8.2-A -diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def -index 70b11eb80..842d64932 100644 ---- a/gcc/config/aarch64/aarch64-cores.def -+++ b/gcc/config/aarch64/aarch64-cores.def -@@ -134,17 +134,17 @@ AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_ - /* ARMv8.3-A Architecture Processors. */ - - /* Marvell cores (TX3). */ --AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) -+AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) - - /* ARMv8.4-A Architecture Processors. */ - - /* Arm ('A') cores. */ --AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) --AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) --AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) -+AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) -+AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) -+AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) - - /* Qualcomm ('Q') cores. */ --AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO, saphira, 0x51, 0xC01, -1) -+AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) - - /* ARMv8-A big.LITTLE implementations. */ - -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 42aae37ef..7c090c8f2 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -262,8 +262,7 @@ - #define AARCH64_FL_FOR_ARCH8_2 \ - (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2) - #define AARCH64_FL_FOR_ARCH8_3 \ -- (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3 | AARCH64_FL_PAUTH \ -- | AARCH64_FL_RCPC) -+ (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3 | AARCH64_FL_PAUTH) - #define AARCH64_FL_FOR_ARCH8_4 \ - (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4 | AARCH64_FL_F16FML \ - | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM) -@@ -314,7 +313,6 @@ - #define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4) - #define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3) - #define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML) --#define AARCH64_ISA_RCPC (aarch64_isa_flags & AARCH64_FL_RCPC) - #define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4) - #define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG) - #define AARCH64_ISA_V8_5 (aarch64_isa_flags & AARCH64_FL_V8_5) -diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c -index 307fa3d67..bfb044f5d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c -@@ -248,26 +248,6 @@ - #error "__ARM_FEATURE_CRC32 is not defined but should be!" - #endif - --#pragma GCC target ("arch=armv8.2-a") --#ifdef __ARM_FEATURE_RCPC --#error "__ARM_FEATURE_RCPC is defined but should not be!" --#endif -- --#pragma GCC target ("arch=armv8.2-a+rcpc") --#ifndef __ARM_FEATURE_RCPC --#error "__ARM_FEATURE_RCPC is not defined but should be!" --#endif -- --#pragma GCC target ("+norcpc") --#ifdef __ARM_FEATURE_RCPC --#error "__ARM_FEATURE_RCPC is defined but should not be!" --#endif -- --#pragma GCC target ("arch=armv8.3-a") --#ifndef __ARM_FEATURE_RCPC --#error "__ARM_FEATURE_RCPC is not defined but should be!" --#endif -- - int - foo (int a) - { --- -2.33.0 - diff --git a/0186-Backport-SME-aarch64-Add-bytes_below_hard_fp-to-fram.patch b/0107-aarch64-Add-bytes-below-hard-fp-to-frame-info.patch similarity index 83% rename from 0186-Backport-SME-aarch64-Add-bytes_below_hard_fp-to-fram.patch rename to 0107-aarch64-Add-bytes-below-hard-fp-to-frame-info.patch index 5eed26e..76c259f 100644 --- a/0186-Backport-SME-aarch64-Add-bytes_below_hard_fp-to-fram.patch +++ b/0107-aarch64-Add-bytes-below-hard-fp-to-frame-info.patch @@ -1,10 +1,7 @@ -From bd5299017c233bcdf0fcc3dd7217eec1641411fe Mon Sep 17 00:00:00 2001 +From 34081079ea4de0c98331843f574b5f6f94d7b234 Mon Sep 17 00:00:00 2001 From: Richard Sandiford -Date: Tue, 12 Sep 2023 16:05:06 +0100 -Subject: [PATCH 087/157] [Backport][SME] aarch64: Add bytes_below_hard_fp to - frame info - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=99305f306246079cc57d30dae7c32107f02ff3e8 +Date: Tue, 12 Sep 2023 16:08:50 +0100 +Subject: [PATCH] aarch64: Add bytes_below_hard_fp to frame info Following on from the previous bytes_below_saved_regs patch, this one records the number of bytes that are below the hard frame pointer. @@ -36,10 +33,10 @@ gcc/ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 5533dd85b..2bb49b9b0 100644 +index 94e1b6865849..c7d84245fbfc 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc -@@ -8869,6 +8869,7 @@ aarch64_layout_frame (void) +@@ -8269,6 +8269,7 @@ aarch64_layout_frame (void) of the callee save area. */ bool saves_below_hard_fp_p = maybe_ne (offset, 0); frame.below_hard_fp_saved_regs_size = offset; @@ -47,7 +44,7 @@ index 5533dd85b..2bb49b9b0 100644 if (frame.emit_frame_chain) { /* FP and LR are placed in the linkage record. */ -@@ -10456,8 +10457,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) +@@ -9856,8 +9857,7 @@ aarch64_expand_epilogue (bool for_sibcall) poly_int64 final_adjust = frame.final_adjust; poly_int64 callee_offset = frame.callee_offset; poly_int64 sve_callee_adjust = frame.sve_callee_adjust; @@ -57,7 +54,7 @@ index 5533dd85b..2bb49b9b0 100644 unsigned reg1 = frame.wb_pop_candidate1; unsigned reg2 = frame.wb_pop_candidate2; unsigned int last_gpr = (frame.is_scs_enabled -@@ -10515,7 +10515,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) +@@ -9915,7 +9915,7 @@ aarch64_expand_epilogue (bool for_sibcall) is restored on the instruction doing the writeback. */ aarch64_add_offset (Pmode, stack_pointer_rtx, hard_frame_pointer_rtx, @@ -67,10 +64,10 @@ index 5533dd85b..2bb49b9b0 100644 else /* The case where we need to re-use the register here is very rare, so diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 9e0ca380e..dedc5b32f 100644 +index 1e105e12db8d..de68ff7202fc 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h -@@ -810,6 +810,11 @@ struct GTY (()) aarch64_frame +@@ -880,6 +880,11 @@ struct GTY (()) aarch64_frame are saved below the hard frame pointer. */ poly_int64 below_hard_fp_saved_regs_size; @@ -83,5 +80,5 @@ index 9e0ca380e..dedc5b32f 100644 top of the locals area. This value is always a multiple of STACK_BOUNDARY. */ -- -2.33.0 +2.43.5 diff --git a/0108-Backport-SME-Revert-Ampere-1-and-Ampere-1A-core-defi.patch b/0108-Backport-SME-Revert-Ampere-1-and-Ampere-1A-core-defi.patch deleted file mode 100644 index a70376d..0000000 --- a/0108-Backport-SME-Revert-Ampere-1-and-Ampere-1A-core-defi.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 34374de5edde59f27a1b3b443e8a163fc5b528d7 Mon Sep 17 00:00:00 2001 -From: xiezhiheng -Date: Tue, 20 Feb 2024 10:13:06 +0800 -Subject: [PATCH 009/157] [Backport][SME] Revert "Ampere-1 and Ampere-1A core - definition in aarch64-cores.def" - -Revert it to solve conflicts with later patches, and will apply it -later. It's introduced by commit 3668a59ae22a and e9f0d974600e. ---- - gcc/config/aarch64/aarch64-cores.def | 6 ++---- - 1 file changed, 2 insertions(+), 4 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def -index 842d64932..0402bfb74 100644 ---- a/gcc/config/aarch64/aarch64-cores.def -+++ b/gcc/config/aarch64/aarch64-cores.def -@@ -69,8 +69,7 @@ AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_ARCH - AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) - - /* Ampere Computing ('\xC0') cores. */ --AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_RNG | AARCH64_FL_SHA3, ampere1, 0xC0, 0xac3, -1) --AARCH64_CORE("ampere1a", ampere1a, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_RNG | AARCH64_FL_SHA3 | AARCH64_FL_MEMTAG, ampere1a, 0xC0, 0xac4, -1) -+AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6, ampere1, 0xC0, 0xac3, -1) - /* Do not swap around "emag" and "xgene1", - this order is required to handle variant correctly. */ - AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) -@@ -164,8 +163,7 @@ AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_ARCH8_R, cor - /* Armv9.0-A Architecture Processors. */ - - /* Arm ('A') cores. */ --AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG -- | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) -+AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) - - AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1) - --- -2.33.0 - diff --git a/0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch b/0108-aarch64-Tweak-aarch64-save-restore-callee-saves.patch similarity index 86% rename from 0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch rename to 0108-aarch64-Tweak-aarch64-save-restore-callee-saves.patch index c302467..e1889d9 100644 --- a/0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch +++ b/0108-aarch64-Tweak-aarch64-save-restore-callee-saves.patch @@ -1,10 +1,7 @@ -From 8e010ea1a3e122a74696250d7c6ce5660a88b8f5 Mon Sep 17 00:00:00 2001 +From 187861af7c51db9eddc6f954b589c121b210fc74 Mon Sep 17 00:00:00 2001 From: Richard Sandiford -Date: Tue, 12 Sep 2023 16:05:06 +0100 -Subject: [PATCH 089/157] [Backport][SME] aarch64: Tweak - aarch64_save/restore_callee_saves - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=38698967268c44991e02aa1e5a2ce9382d6de9db +Date: Tue, 12 Sep 2023 16:08:50 +0100 +Subject: [PATCH] aarch64: Tweak aarch64_save/restore_callee_saves aarch64_save_callee_saves and aarch64_restore_callee_saves took a parameter called start_offset that gives the offset of the @@ -32,10 +29,10 @@ gcc/ 2 files changed, 28 insertions(+), 32 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 4d505c6fc..a0a4c7ac3 100644 +index c7d84245fbfc..e79551af41df 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc -@@ -8943,7 +8943,6 @@ aarch64_layout_frame (void) +@@ -8343,7 +8343,6 @@ aarch64_layout_frame (void) frame.final_adjust = 0; frame.callee_adjust = 0; frame.sve_callee_adjust = 0; @@ -43,7 +40,7 @@ index 4d505c6fc..a0a4c7ac3 100644 frame.wb_pop_candidate1 = frame.wb_push_candidate1; frame.wb_pop_candidate2 = frame.wb_push_candidate2; -@@ -9011,7 +9010,6 @@ aarch64_layout_frame (void) +@@ -8411,7 +8410,6 @@ aarch64_layout_frame (void) stp reg1, reg2, [sp, bytes_below_saved_regs] stp reg3, reg4, [sp, bytes_below_saved_regs + 16] */ frame.initial_adjust = frame.frame_size; @@ -51,7 +48,7 @@ index 4d505c6fc..a0a4c7ac3 100644 } else if (saves_below_hard_fp_p && known_eq (frame.saved_regs_size, -@@ -9358,12 +9356,13 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg, +@@ -8758,12 +8756,13 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg, } /* Emit code to save the callee-saved registers from register number START @@ -69,7 +66,7 @@ index 4d505c6fc..a0a4c7ac3 100644 unsigned start, unsigned limit, bool skip_wb, bool hard_fp_valid_p) { -@@ -9391,7 +9390,9 @@ aarch64_save_callee_saves (poly_int64 start_offset, +@@ -8791,7 +8790,9 @@ aarch64_save_callee_saves (poly_int64 start_offset, machine_mode mode = aarch64_reg_save_mode (regno); reg = gen_rtx_REG (mode, regno); @@ -80,7 +77,7 @@ index 4d505c6fc..a0a4c7ac3 100644 rtx base_rtx = stack_pointer_rtx; poly_int64 sp_offset = offset; -@@ -9402,9 +9403,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, +@@ -8802,9 +8803,7 @@ aarch64_save_callee_saves (poly_int64 start_offset, else if (GP_REGNUM_P (regno) && (!offset.is_constant (&const_offset) || const_offset >= 512)) { @@ -91,7 +88,7 @@ index 4d505c6fc..a0a4c7ac3 100644 if (hard_fp_valid_p) base_rtx = hard_frame_pointer_rtx; else -@@ -9468,12 +9467,13 @@ aarch64_save_callee_saves (poly_int64 start_offset, +@@ -8868,12 +8867,13 @@ aarch64_save_callee_saves (poly_int64 start_offset, } /* Emit code to restore the callee registers from register number START @@ -109,7 +106,7 @@ index 4d505c6fc..a0a4c7ac3 100644 unsigned limit, bool skip_wb, rtx *cfi_ops) { aarch64_frame &frame = cfun->machine->frame; -@@ -9499,7 +9499,9 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, +@@ -8899,7 +8899,9 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, machine_mode mode = aarch64_reg_save_mode (regno); reg = gen_rtx_REG (mode, regno); @@ -120,7 +117,7 @@ index 4d505c6fc..a0a4c7ac3 100644 rtx base_rtx = stack_pointer_rtx; if (mode == VNx2DImode && BYTES_BIG_ENDIAN) aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, -@@ -10285,8 +10287,6 @@ aarch64_expand_prologue (void) +@@ -9675,8 +9677,6 @@ aarch64_expand_prologue (void) HOST_WIDE_INT callee_adjust = frame.callee_adjust; poly_int64 final_adjust = frame.final_adjust; poly_int64 sve_callee_adjust = frame.sve_callee_adjust; @@ -129,7 +126,7 @@ index 4d505c6fc..a0a4c7ac3 100644 unsigned reg1 = frame.wb_push_candidate1; unsigned reg2 = frame.wb_push_candidate2; bool emit_frame_chain = frame.emit_frame_chain; -@@ -10362,8 +10362,8 @@ aarch64_expand_prologue (void) +@@ -9752,8 +9752,8 @@ aarch64_expand_prologue (void) - frame.hard_fp_offset); gcc_assert (known_ge (chain_offset, 0)); @@ -140,7 +137,7 @@ index 4d505c6fc..a0a4c7ac3 100644 if (emit_frame_chain) { -@@ -10371,7 +10371,7 @@ aarch64_expand_prologue (void) +@@ -9761,7 +9761,7 @@ aarch64_expand_prologue (void) { reg1 = R29_REGNUM; reg2 = R30_REGNUM; @@ -149,8 +146,8 @@ index 4d505c6fc..a0a4c7ac3 100644 false, false); } else -@@ -10411,7 +10411,7 @@ aarch64_expand_prologue (void) - aarch64_emit_stack_tie (hard_frame_pointer_rtx); +@@ -9801,7 +9801,7 @@ aarch64_expand_prologue (void) + emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); } - aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM, @@ -158,7 +155,7 @@ index 4d505c6fc..a0a4c7ac3 100644 callee_adjust != 0 || emit_frame_chain, emit_frame_chain); if (maybe_ne (sve_callee_adjust, 0)) -@@ -10421,16 +10421,17 @@ aarch64_expand_prologue (void) +@@ -9811,16 +9811,17 @@ aarch64_expand_prologue (void) aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, sve_callee_adjust, !frame_pointer_needed, false); @@ -179,7 +176,7 @@ index 4d505c6fc..a0a4c7ac3 100644 aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust, !frame_pointer_needed, true); } -@@ -10465,7 +10466,6 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) +@@ -9855,7 +9856,6 @@ aarch64_expand_epilogue (bool for_sibcall) poly_int64 initial_adjust = frame.initial_adjust; HOST_WIDE_INT callee_adjust = frame.callee_adjust; poly_int64 final_adjust = frame.final_adjust; @@ -187,7 +184,7 @@ index 4d505c6fc..a0a4c7ac3 100644 poly_int64 sve_callee_adjust = frame.sve_callee_adjust; poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp; unsigned reg1 = frame.wb_pop_candidate1; -@@ -10535,9 +10535,9 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) +@@ -9925,9 +9925,9 @@ aarch64_expand_epilogue (bool for_sibcall) /* Restore the vector registers before the predicate registers, so that we can use P4 as a temporary for big-endian SVE frames. */ @@ -199,7 +196,7 @@ index 4d505c6fc..a0a4c7ac3 100644 false, &cfi_ops); if (maybe_ne (sve_callee_adjust, 0)) aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true); -@@ -10545,7 +10545,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) +@@ -9935,7 +9935,7 @@ aarch64_expand_epilogue (bool for_sibcall) /* When shadow call stack is enabled, the scs_pop in the epilogue will restore x30, we don't need to restore x30 again in the traditional way. */ @@ -209,10 +206,10 @@ index 4d505c6fc..a0a4c7ac3 100644 callee_adjust != 0, &cfi_ops); diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index dedc5b32f..a1db4f689 100644 +index de68ff7202fc..94fca4b94716 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h -@@ -837,10 +837,6 @@ struct GTY (()) aarch64_frame +@@ -907,10 +907,6 @@ struct GTY (()) aarch64_frame It is zero when no push is used. */ HOST_WIDE_INT callee_adjust; @@ -224,5 +221,5 @@ index dedc5b32f..a1db4f689 100644 SVE registers. */ poly_int64 sve_callee_adjust; -- -2.33.0 +2.43.5 diff --git a/0109-Backport-SME-aarch64-Rename-AARCH64_ISA-architecture.patch b/0109-Backport-SME-aarch64-Rename-AARCH64_ISA-architecture.patch deleted file mode 100644 index 9b541de..0000000 --- a/0109-Backport-SME-aarch64-Rename-AARCH64_ISA-architecture.patch +++ /dev/null @@ -1,157 +0,0 @@ -From 244780570ebc85c44806559ba165d4a70a2333d1 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 29 Sep 2022 11:32:50 +0100 -Subject: [PATCH 010/157] [Backport][SME] aarch64: Rename AARCH64_ISA - architecture-level macros - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2a4788ac3bae1467b0379852d5a6690a8496d0c9 - -All AARCH64_ISA_* architecture-level macros except AARCH64_ISA_V8_R -are for the A profile: they cause __ARM_ARCH_PROFILE to be set to -'A' and they are associated with architecture names like armv8.4-a. - -It's convenient for later patches if we make this explicit -by adding an "A" to the name. Also, rather than add an underscore -(as for V8_R) it's more convenient to add the profile directly -to the number, like we already do in the ARCH_IDENT field of the -aarch64-arches.def entries. - -gcc/ - * config/aarch64/aarch64.h (AARCH64_ISA_V8_2, AARCH64_ISA_V8_3) - (AARCH64_ISA_V8_4, AARCH64_ISA_V8_5, AARCH64_ISA_V8_6) - (AARCH64_ISA_V9, AARCH64_ISA_V9_1, AARCH64_ISA_V9_2) - (AARCH64_ISA_V9_3): Add "A" to the end of the name. - (AARCH64_ISA_V8_R): Rename to AARCH64_ISA_V8R. - (TARGET_ARMV8_3, TARGET_JSCVT, TARGET_FRINT, TARGET_MEMTAG): Update - accordingly. - * common/config/aarch64/aarch64-common.cc - (aarch64_get_extension_string_for_isa_flags): Likewise. - * config/aarch64/aarch64-c.cc - (aarch64_define_unconditional_macros): Likewise. ---- - gcc/common/config/aarch64/aarch64-common.cc | 2 +- - gcc/config/aarch64/aarch64-c.cc | 4 +-- - gcc/config/aarch64/aarch64.h | 28 ++++++++++----------- - 3 files changed, 17 insertions(+), 17 deletions(-) - -diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc -index 85ce8133b..3dc020f0c 100644 ---- a/gcc/common/config/aarch64/aarch64-common.cc -+++ b/gcc/common/config/aarch64/aarch64-common.cc -@@ -506,7 +506,7 @@ aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags, - - Note that assemblers with Armv8-R AArch64 support should not have this - issue, so we don't need this fix when targeting Armv8-R. */ -- if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8_R) -+ if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8R) - isa_flag_bits |= AARCH64_ISA_CRC; - - /* Pass Two: -diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc -index 3d2fb5ec2..18c9b975b 100644 ---- a/gcc/config/aarch64/aarch64-c.cc -+++ b/gcc/config/aarch64/aarch64-c.cc -@@ -64,7 +64,7 @@ aarch64_define_unconditional_macros (cpp_reader *pfile) - builtin_define ("__ARM_ARCH_8A"); - - builtin_define_with_int_value ("__ARM_ARCH_PROFILE", -- AARCH64_ISA_V8_R ? 'R' : 'A'); -+ AARCH64_ISA_V8R ? 'R' : 'A'); - builtin_define ("__ARM_FEATURE_CLZ"); - builtin_define ("__ARM_FEATURE_IDIV"); - builtin_define ("__ARM_FEATURE_UNALIGNED"); -@@ -82,7 +82,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) - { - aarch64_def_or_undef (flag_unsafe_math_optimizations, "__ARM_FP_FAST", pfile); - -- builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9 ? 9 : 8); -+ builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9A ? 9 : 8); - - builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM", - flag_short_enums ? 1 : 4); -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 7c090c8f2..356a263b2 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -297,7 +297,7 @@ - #define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD) - #define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE) - #define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_RDMA) --#define AARCH64_ISA_V8_2 (aarch64_isa_flags & AARCH64_FL_V8_2) -+#define AARCH64_ISA_V8_2A (aarch64_isa_flags & AARCH64_FL_V8_2) - #define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16) - #define AARCH64_ISA_SVE (aarch64_isa_flags & AARCH64_FL_SVE) - #define AARCH64_ISA_SVE2 (aarch64_isa_flags & AARCH64_FL_SVE2) -@@ -305,31 +305,31 @@ - #define AARCH64_ISA_SVE2_BITPERM (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM) - #define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3) - #define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4) --#define AARCH64_ISA_V8_3 (aarch64_isa_flags & AARCH64_FL_V8_3) -+#define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3) - #define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD) - #define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES) - #define AARCH64_ISA_SHA2 (aarch64_isa_flags & AARCH64_FL_SHA2) --#define AARCH64_ISA_V8_4 (aarch64_isa_flags & AARCH64_FL_V8_4) -+#define AARCH64_ISA_V8_4A (aarch64_isa_flags & AARCH64_FL_V8_4) - #define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4) - #define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3) - #define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML) - #define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4) - #define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG) --#define AARCH64_ISA_V8_5 (aarch64_isa_flags & AARCH64_FL_V8_5) -+#define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5) - #define AARCH64_ISA_TME (aarch64_isa_flags & AARCH64_FL_TME) - #define AARCH64_ISA_MEMTAG (aarch64_isa_flags & AARCH64_FL_MEMTAG) --#define AARCH64_ISA_V8_6 (aarch64_isa_flags & AARCH64_FL_V8_6) -+#define AARCH64_ISA_V8_6A (aarch64_isa_flags & AARCH64_FL_V8_6) - #define AARCH64_ISA_I8MM (aarch64_isa_flags & AARCH64_FL_I8MM) - #define AARCH64_ISA_F32MM (aarch64_isa_flags & AARCH64_FL_F32MM) - #define AARCH64_ISA_F64MM (aarch64_isa_flags & AARCH64_FL_F64MM) - #define AARCH64_ISA_BF16 (aarch64_isa_flags & AARCH64_FL_BF16) - #define AARCH64_ISA_SB (aarch64_isa_flags & AARCH64_FL_SB) --#define AARCH64_ISA_V8_R (aarch64_isa_flags & AARCH64_FL_V8_R) -+#define AARCH64_ISA_V8R (aarch64_isa_flags & AARCH64_FL_V8_R) - #define AARCH64_ISA_PAUTH (aarch64_isa_flags & AARCH64_FL_PAUTH) --#define AARCH64_ISA_V9 (aarch64_isa_flags & AARCH64_FL_V9) --#define AARCH64_ISA_V9_1 (aarch64_isa_flags & AARCH64_FL_V9_1) --#define AARCH64_ISA_V9_2 (aarch64_isa_flags & AARCH64_FL_V9_2) --#define AARCH64_ISA_V9_3 (aarch64_isa_flags & AARCH64_FL_V9_3) -+#define AARCH64_ISA_V9A (aarch64_isa_flags & AARCH64_FL_V9) -+#define AARCH64_ISA_V9_1A (aarch64_isa_flags & AARCH64_FL_V9_1) -+#define AARCH64_ISA_V9_2A (aarch64_isa_flags & AARCH64_FL_V9_2) -+#define AARCH64_ISA_V9_3A (aarch64_isa_flags & AARCH64_FL_V9_3) - #define AARCH64_ISA_MOPS (aarch64_isa_flags & AARCH64_FL_MOPS) - #define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64) - -@@ -383,16 +383,16 @@ - #define TARGET_SVE2_SM4 (TARGET_SVE2 && AARCH64_ISA_SVE2_SM4) - - /* ARMv8.3-A features. */ --#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3) -+#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A) - - /* Javascript conversion instruction from Armv8.3-a. */ --#define TARGET_JSCVT (TARGET_FLOAT && AARCH64_ISA_V8_3) -+#define TARGET_JSCVT (TARGET_FLOAT && AARCH64_ISA_V8_3A) - - /* Armv8.3-a Complex number extension to AdvSIMD extensions. */ - #define TARGET_COMPLEX (TARGET_SIMD && TARGET_ARMV8_3) - - /* Floating-point rounding instructions from Armv8.5-a. */ --#define TARGET_FRINT (AARCH64_ISA_V8_5 && TARGET_FLOAT) -+#define TARGET_FRINT (AARCH64_ISA_V8_5A && TARGET_FLOAT) - - /* TME instructions are enabled. */ - #define TARGET_TME (AARCH64_ISA_TME) -@@ -401,7 +401,7 @@ - #define TARGET_RNG (AARCH64_ISA_RNG) - - /* Memory Tagging instructions optional to Armv8.5 enabled through +memtag. */ --#define TARGET_MEMTAG (AARCH64_ISA_V8_5 && AARCH64_ISA_MEMTAG) -+#define TARGET_MEMTAG (AARCH64_ISA_V8_5A && AARCH64_ISA_MEMTAG) - - /* I8MM instructions are enabled through +i8mm. */ - #define TARGET_I8MM (AARCH64_ISA_I8MM) --- -2.33.0 - diff --git a/0189-Backport-SME-aarch64-Only-calculate-chain_offset-if-.patch b/0109-aarch64-Only-calculate-chain-offset-if-there-is-a-chain.patch similarity index 77% rename from 0189-Backport-SME-aarch64-Only-calculate-chain_offset-if-.patch rename to 0109-aarch64-Only-calculate-chain-offset-if-there-is-a-chain.patch index 267b3ab..80bbadf 100644 --- a/0189-Backport-SME-aarch64-Only-calculate-chain_offset-if-.patch +++ b/0109-aarch64-Only-calculate-chain-offset-if-there-is-a-chain.patch @@ -1,10 +1,7 @@ -From c8768dd861538817db8c1955dcce4b6d8ce17c48 Mon Sep 17 00:00:00 2001 +From 2b983f9064d808daf909bde1d4a13980934a7e6e Mon Sep 17 00:00:00 2001 From: Richard Sandiford -Date: Tue, 12 Sep 2023 16:05:07 +0100 -Subject: [PATCH 090/157] [Backport][SME] aarch64: Only calculate chain_offset - if there is a chain - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=aa8b57ee0206e8e5ac7078692ee67fb6ead05645 +Date: Tue, 12 Sep 2023 16:08:51 +0100 +Subject: [PATCH] aarch64: Only calculate chain_offset if there is a chain After previous patches, it is no longer necessary to calculate a chain_offset in cases where there is no chain record. @@ -17,10 +14,10 @@ gcc/ 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index a0a4c7ac3..bef6a658b 100644 +index e79551af41df..d71a042d6112 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc -@@ -10357,16 +10357,16 @@ aarch64_expand_prologue (void) +@@ -9747,16 +9747,16 @@ aarch64_expand_prologue (void) if (callee_adjust != 0) aarch64_push_regs (reg1, reg2, callee_adjust); @@ -43,5 +40,5 @@ index a0a4c7ac3..bef6a658b 100644 { reg1 = R29_REGNUM; -- -2.33.0 +2.43.5 diff --git a/0110-Backport-SME-aarch64-Rename-AARCH64_FL-architecture-.patch b/0110-Backport-SME-aarch64-Rename-AARCH64_FL-architecture-.patch deleted file mode 100644 index 99317e4..0000000 --- a/0110-Backport-SME-aarch64-Rename-AARCH64_FL-architecture-.patch +++ /dev/null @@ -1,220 +0,0 @@ -From e1b067871c4c39565bf6059b4924a810923c6eeb Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 29 Sep 2022 11:32:51 +0100 -Subject: [PATCH 011/157] [Backport][SME] aarch64: Rename AARCH64_FL - architecture-level macros - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=78aaafc3d4dc0ef997b4747349d3836ca2f7e301 - -Following on from the previous AARCH64_ISA patch, this one adds the -profile name directly to the end of architecture-level AARCH64_FL_* -macros. - -gcc/ - * config/aarch64/aarch64.h (AARCH64_FL_V8_1, AARCH64_FL_V8_2) - (AARCH64_FL_V8_3, AARCH64_FL_V8_4, AARCH64_FL_V8_5, AARCH64_FL_V8_6) - (AARCH64_FL_V9, AARCH64_FL_V8_7, AARCH64_FL_V8_8, AARCH64_FL_V9_1) - (AARCH64_FL_V9_2, AARCH64_FL_V9_3): Add "A" to the end of the name. - (AARCH64_FL_V8_R): Rename to AARCH64_FL_V8R. - (AARCH64_FL_FOR_ARCH8_1, AARCH64_FL_FOR_ARCH8_2): Update accordingly. - (AARCH64_FL_FOR_ARCH8_3, AARCH64_FL_FOR_ARCH8_4): Likewise. - (AARCH64_FL_FOR_ARCH8_5, AARCH64_FL_FOR_ARCH8_6): Likewise. - (AARCH64_FL_FOR_ARCH8_7, AARCH64_FL_FOR_ARCH8_8): Likewise. - (AARCH64_FL_FOR_ARCH8_R, AARCH64_FL_FOR_ARCH9): Likewise. - (AARCH64_FL_FOR_ARCH9_1, AARCH64_FL_FOR_ARCH9_2): Likewise. - (AARCH64_FL_FOR_ARCH9_3, AARCH64_ISA_V8_2A, AARCH64_ISA_V8_3A) - (AARCH64_ISA_V8_4A, AARCH64_ISA_V8_5A, AARCH64_ISA_V8_6A): Likewise. - (AARCH64_ISA_V8R, AARCH64_ISA_V9A, AARCH64_ISA_V9_1A): Likewise. - (AARCH64_ISA_V9_2A, AARCH64_ISA_V9_3A): Likewise. ---- - gcc/config/aarch64/aarch64.h | 72 ++++++++++++++++++------------------ - 1 file changed, 36 insertions(+), 36 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 356a263b2..5a91dfdd2 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -154,22 +154,22 @@ - /* ARMv8.1-A architecture extensions. */ - #define AARCH64_FL_LSE (1 << 4) /* Has Large System Extensions. */ - #define AARCH64_FL_RDMA (1 << 5) /* Has Round Double Multiply Add. */ --#define AARCH64_FL_V8_1 (1 << 6) /* Has ARMv8.1-A extensions. */ -+#define AARCH64_FL_V8_1A (1 << 6) /* Has ARMv8.1-A extensions. */ - /* Armv8-R. */ --#define AARCH64_FL_V8_R (1 << 7) /* Armv8-R AArch64. */ -+#define AARCH64_FL_V8R (1 << 7) /* Armv8-R AArch64. */ - /* ARMv8.2-A architecture extensions. */ --#define AARCH64_FL_V8_2 (1 << 8) /* Has ARMv8.2-A features. */ -+#define AARCH64_FL_V8_2A (1 << 8) /* Has ARMv8.2-A features. */ - #define AARCH64_FL_F16 (1 << 9) /* Has ARMv8.2-A FP16 extensions. */ - #define AARCH64_FL_SVE (1 << 10) /* Has Scalable Vector Extensions. */ - /* ARMv8.3-A architecture extensions. */ --#define AARCH64_FL_V8_3 (1 << 11) /* Has ARMv8.3-A features. */ -+#define AARCH64_FL_V8_3A (1 << 11) /* Has ARMv8.3-A features. */ - #define AARCH64_FL_RCPC (1 << 12) /* Has support for RCpc model. */ - #define AARCH64_FL_DOTPROD (1 << 13) /* Has ARMv8.2-A Dot Product ins. */ - /* New flags to split crypto into aes and sha2. */ - #define AARCH64_FL_AES (1 << 14) /* Has Crypto AES. */ - #define AARCH64_FL_SHA2 (1 << 15) /* Has Crypto SHA2. */ - /* ARMv8.4-A architecture extensions. */ --#define AARCH64_FL_V8_4 (1 << 16) /* Has ARMv8.4-A features. */ -+#define AARCH64_FL_V8_4A (1 << 16) /* Has ARMv8.4-A features. */ - #define AARCH64_FL_SM4 (1 << 17) /* Has ARMv8.4-A SM3 and SM4. */ - #define AARCH64_FL_SHA3 (1 << 18) /* Has ARMv8.4-a SHA3 and SHA512. */ - #define AARCH64_FL_F16FML (1 << 19) /* Has ARMv8.4-a FP16 extensions. */ -@@ -179,7 +179,7 @@ - #define AARCH64_FL_PROFILE (1 << 21) - - /* ARMv8.5-A architecture extensions. */ --#define AARCH64_FL_V8_5 (1 << 22) /* Has ARMv8.5-A features. */ -+#define AARCH64_FL_V8_5A (1 << 22) /* Has ARMv8.5-A features. */ - #define AARCH64_FL_RNG (1 << 23) /* ARMv8.5-A Random Number Insns. */ - #define AARCH64_FL_MEMTAG (1 << 24) /* ARMv8.5-A Memory Tagging - Extensions. */ -@@ -204,7 +204,7 @@ - #define AARCH64_FL_TME (1ULL << 33) /* Has TME instructions. */ - - /* Armv8.6-A architecture extensions. */ --#define AARCH64_FL_V8_6 (1ULL << 34) -+#define AARCH64_FL_V8_6A (1ULL << 34) - - /* 8-bit Integer Matrix Multiply (I8MM) extensions. */ - #define AARCH64_FL_I8MM (1ULL << 35) -@@ -225,28 +225,28 @@ - #define AARCH64_FL_PAUTH (1ULL << 40) - - /* Armv9.0-A. */ --#define AARCH64_FL_V9 (1ULL << 41) /* Armv9.0-A Architecture. */ -+#define AARCH64_FL_V9A (1ULL << 41) /* Armv9.0-A Architecture. */ - - /* 64-byte atomic load/store extensions. */ - #define AARCH64_FL_LS64 (1ULL << 42) - - /* Armv8.7-a architecture extensions. */ --#define AARCH64_FL_V8_7 (1ULL << 43) -+#define AARCH64_FL_V8_7A (1ULL << 43) - - /* Hardware memory operation instructions. */ - #define AARCH64_FL_MOPS (1ULL << 44) - - /* Armv8.8-a architecture extensions. */ --#define AARCH64_FL_V8_8 (1ULL << 45) -+#define AARCH64_FL_V8_8A (1ULL << 45) - - /* Armv9.1-A. */ --#define AARCH64_FL_V9_1 (1ULL << 46) -+#define AARCH64_FL_V9_1A (1ULL << 46) - - /* Armv9.2-A. */ --#define AARCH64_FL_V9_2 (1ULL << 47) -+#define AARCH64_FL_V9_2A (1ULL << 47) - - /* Armv9.3-A. */ --#define AARCH64_FL_V9_3 (1ULL << 48) -+#define AARCH64_FL_V9_3A (1ULL << 48) - - /* Has FP and SIMD. */ - #define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD) -@@ -258,36 +258,36 @@ - #define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD) - #define AARCH64_FL_FOR_ARCH8_1 \ - (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC \ -- | AARCH64_FL_RDMA | AARCH64_FL_V8_1) -+ | AARCH64_FL_RDMA | AARCH64_FL_V8_1A) - #define AARCH64_FL_FOR_ARCH8_2 \ -- (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2) -+ (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2A) - #define AARCH64_FL_FOR_ARCH8_3 \ -- (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3 | AARCH64_FL_PAUTH) -+ (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH) - #define AARCH64_FL_FOR_ARCH8_4 \ -- (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4 | AARCH64_FL_F16FML \ -+ (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \ - | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM) - #define AARCH64_FL_FOR_ARCH8_5 \ -- (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_5 \ -+ (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_5A \ - | AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES) - #define AARCH64_FL_FOR_ARCH8_6 \ -- (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_V8_6 | AARCH64_FL_FPSIMD \ -+ (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_V8_6A | AARCH64_FL_FPSIMD \ - | AARCH64_FL_I8MM | AARCH64_FL_BF16) - #define AARCH64_FL_FOR_ARCH8_7 \ -- (AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V8_7 | AARCH64_FL_LS64) -+ (AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V8_7A | AARCH64_FL_LS64) - #define AARCH64_FL_FOR_ARCH8_8 \ -- (AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V8_8 | AARCH64_FL_MOPS) -+ (AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V8_8A | AARCH64_FL_MOPS) - - #define AARCH64_FL_FOR_ARCH8_R \ -- (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_R) -+ (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8R) - #define AARCH64_FL_FOR_ARCH9 \ -- (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9 \ -+ (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9A \ - | AARCH64_FL_F16) - #define AARCH64_FL_FOR_ARCH9_1 \ -- (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1) -+ (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1A) - #define AARCH64_FL_FOR_ARCH9_2 \ -- (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2) -+ (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2A) - #define AARCH64_FL_FOR_ARCH9_3 \ -- (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3) -+ (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3A) - - /* Macros to test ISA flags. */ - -@@ -297,7 +297,7 @@ - #define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD) - #define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE) - #define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_RDMA) --#define AARCH64_ISA_V8_2A (aarch64_isa_flags & AARCH64_FL_V8_2) -+#define AARCH64_ISA_V8_2A (aarch64_isa_flags & AARCH64_FL_V8_2A) - #define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16) - #define AARCH64_ISA_SVE (aarch64_isa_flags & AARCH64_FL_SVE) - #define AARCH64_ISA_SVE2 (aarch64_isa_flags & AARCH64_FL_SVE2) -@@ -305,31 +305,31 @@ - #define AARCH64_ISA_SVE2_BITPERM (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM) - #define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3) - #define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4) --#define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3) -+#define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3A) - #define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD) - #define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES) - #define AARCH64_ISA_SHA2 (aarch64_isa_flags & AARCH64_FL_SHA2) --#define AARCH64_ISA_V8_4A (aarch64_isa_flags & AARCH64_FL_V8_4) -+#define AARCH64_ISA_V8_4A (aarch64_isa_flags & AARCH64_FL_V8_4A) - #define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4) - #define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3) - #define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML) - #define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4) - #define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG) --#define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5) -+#define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5A) - #define AARCH64_ISA_TME (aarch64_isa_flags & AARCH64_FL_TME) - #define AARCH64_ISA_MEMTAG (aarch64_isa_flags & AARCH64_FL_MEMTAG) --#define AARCH64_ISA_V8_6A (aarch64_isa_flags & AARCH64_FL_V8_6) -+#define AARCH64_ISA_V8_6A (aarch64_isa_flags & AARCH64_FL_V8_6A) - #define AARCH64_ISA_I8MM (aarch64_isa_flags & AARCH64_FL_I8MM) - #define AARCH64_ISA_F32MM (aarch64_isa_flags & AARCH64_FL_F32MM) - #define AARCH64_ISA_F64MM (aarch64_isa_flags & AARCH64_FL_F64MM) - #define AARCH64_ISA_BF16 (aarch64_isa_flags & AARCH64_FL_BF16) - #define AARCH64_ISA_SB (aarch64_isa_flags & AARCH64_FL_SB) --#define AARCH64_ISA_V8R (aarch64_isa_flags & AARCH64_FL_V8_R) -+#define AARCH64_ISA_V8R (aarch64_isa_flags & AARCH64_FL_V8R) - #define AARCH64_ISA_PAUTH (aarch64_isa_flags & AARCH64_FL_PAUTH) --#define AARCH64_ISA_V9A (aarch64_isa_flags & AARCH64_FL_V9) --#define AARCH64_ISA_V9_1A (aarch64_isa_flags & AARCH64_FL_V9_1) --#define AARCH64_ISA_V9_2A (aarch64_isa_flags & AARCH64_FL_V9_2) --#define AARCH64_ISA_V9_3A (aarch64_isa_flags & AARCH64_FL_V9_3) -+#define AARCH64_ISA_V9A (aarch64_isa_flags & AARCH64_FL_V9A) -+#define AARCH64_ISA_V9_1A (aarch64_isa_flags & AARCH64_FL_V9_1A) -+#define AARCH64_ISA_V9_2A (aarch64_isa_flags & AARCH64_FL_V9_2A) -+#define AARCH64_ISA_V9_3A (aarch64_isa_flags & AARCH64_FL_V9_3A) - #define AARCH64_ISA_MOPS (aarch64_isa_flags & AARCH64_FL_MOPS) - #define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64) - --- -2.33.0 - diff --git a/0190-Backport-SME-aarch64-Rename-locals_offset-to-bytes_a.patch b/0110-aarch64-Rename-locals-offset-to-bytes-above-locals.patch similarity index 80% rename from 0190-Backport-SME-aarch64-Rename-locals_offset-to-bytes_a.patch rename to 0110-aarch64-Rename-locals-offset-to-bytes-above-locals.patch index 8b44c65..92a4625 100644 --- a/0190-Backport-SME-aarch64-Rename-locals_offset-to-bytes_a.patch +++ b/0110-aarch64-Rename-locals-offset-to-bytes-above-locals.patch @@ -1,25 +1,22 @@ -From 43dc03de6d608e10d83cc7994d127e3764bfbcf7 Mon Sep 17 00:00:00 2001 +From 0a0a824808d1dec51004fb5805c1a0ae2a35433f Mon Sep 17 00:00:00 2001 From: Richard Sandiford -Date: Tue, 12 Sep 2023 16:05:07 +0100 -Subject: [PATCH 091/157] [Backport][SME] aarch64: Rename locals_offset to - bytes_above_locals +Date: Tue, 12 Sep 2023 16:08:51 +0100 +Subject: [PATCH] aarch64: Rename locals_offset to bytes_above_locals MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 +Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=28034dbb5e32711d5f4d655576f2499e6f57f854 - locals_offset was described as: /* Offset from the base of the frame (incomming SP) to the top of the locals area. This value is always a multiple of STACK_BOUNDARY. */ -This is implicitly an “upside down” view of the frame: the incoming +This is implicitly an “upside down” view of the frame: the incoming SP is at offset 0, and anything N bytes below the incoming SP is at offset N (rather than -N). -However, reg_offset instead uses a “right way up” view; that is, +However, reg_offset instead uses a “right way up” view; that is, it views offsets in address terms. Something above X is at a positive offset from X and something below X is at a negative offset from X. @@ -43,10 +40,10 @@ gcc/ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index bef6a658b..992f71bbd 100644 +index d71a042d6112..d4ec352ba98a 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc -@@ -8937,7 +8937,7 @@ aarch64_layout_frame (void) +@@ -8337,7 +8337,7 @@ aarch64_layout_frame (void) STACK_BOUNDARY / BITS_PER_UNIT)); frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs; @@ -55,7 +52,7 @@ index bef6a658b..992f71bbd 100644 frame.initial_adjust = 0; frame.final_adjust = 0; -@@ -13047,13 +13047,13 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to) +@@ -12578,13 +12578,13 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to) return frame.hard_fp_offset; if (from == FRAME_POINTER_REGNUM) @@ -72,10 +69,10 @@ index bef6a658b..992f71bbd 100644 return frame.frame_size; diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index a1db4f689..2acff9a96 100644 +index 94fca4b94716..bf46e6124aa9 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h -@@ -815,10 +815,10 @@ struct GTY (()) aarch64_frame +@@ -885,10 +885,10 @@ struct GTY (()) aarch64_frame always a multiple of STACK_BOUNDARY. */ poly_int64 bytes_below_hard_fp; @@ -90,5 +87,5 @@ index a1db4f689..2acff9a96 100644 /* Offset from the base of the frame (incomming SP) to the hard_frame_pointer. This value is always a multiple of -- -2.33.0 +2.43.5 diff --git a/0111-Backport-SME-aarch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch b/0111-Backport-SME-aarch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch deleted file mode 100644 index 77737f5..0000000 --- a/0111-Backport-SME-aarch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch +++ /dev/null @@ -1,398 +0,0 @@ -From 7da27deb7413d7d1fd2c543617640e2de5b10db0 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 29 Sep 2022 11:32:51 +0100 -Subject: [PATCH 012/157] [Backport][SME] aarch64: Rename AARCH64_FL_FOR_ARCH - macros - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0f833d1900176509e16b6f5563cfe58508fef5d2 - -This patch renames AARCH64_FL_FOR_ARCH* macros to follow the -same V names that we (now) use elsewhere. - -The names are only temporary -- a later patch will move the -information to the .def file instead. However, it helps with -the sequencing to do this first. - -gcc/ - * config/aarch64/aarch64.h (AARCH64_FL_FOR_ARCH8): Rename to... - (AARCH64_FL_FOR_V8A): ...this. - (AARCH64_FL_FOR_ARCH8_1): Rename to... - (AARCH64_FL_FOR_V8_1A): ...this. - (AARCH64_FL_FOR_ARCH8_2): Rename to... - (AARCH64_FL_FOR_V8_2A): ...this. - (AARCH64_FL_FOR_ARCH8_3): Rename to... - (AARCH64_FL_FOR_V8_3A): ...this. - (AARCH64_FL_FOR_ARCH8_4): Rename to... - (AARCH64_FL_FOR_V8_4A): ...this. - (AARCH64_FL_FOR_ARCH8_5): Rename to... - (AARCH64_FL_FOR_V8_5A): ...this. - (AARCH64_FL_FOR_ARCH8_6): Rename to... - (AARCH64_FL_FOR_V8_6A): ...this. - (AARCH64_FL_FOR_ARCH8_7): Rename to... - (AARCH64_FL_FOR_V8_7A): ...this. - (AARCH64_FL_FOR_ARCH8_8): Rename to... - (AARCH64_FL_FOR_V8_8A): ...this. - (AARCH64_FL_FOR_ARCH8_R): Rename to... - (AARCH64_FL_FOR_V8R): ...this. - (AARCH64_FL_FOR_ARCH9): Rename to... - (AARCH64_FL_FOR_V9A): ...this. - (AARCH64_FL_FOR_ARCH9_1): Rename to... - (AARCH64_FL_FOR_V9_1A): ...this. - (AARCH64_FL_FOR_ARCH9_2): Rename to... - (AARCH64_FL_FOR_V9_2A): ...this. - (AARCH64_FL_FOR_ARCH9_3): Rename to... - (AARCH64_FL_FOR_V9_3A): ...this. - * common/config/aarch64/aarch64-common.cc (all_cores): Update - accordingly. - * config/aarch64/aarch64-arches.def: Likewise. - * config/aarch64/aarch64-cores.def: Likewise. - * config/aarch64/aarch64.cc (all_cores): Likewise. ---- - gcc/common/config/aarch64/aarch64-common.cc | 2 +- - gcc/config/aarch64/aarch64-arches.def | 28 ++--- - gcc/config/aarch64/aarch64-cores.def | 130 ++++++++++---------- - gcc/config/aarch64/aarch64.cc | 2 +- - gcc/config/aarch64/aarch64.h | 56 ++++----- - 5 files changed, 109 insertions(+), 109 deletions(-) - -diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc -index 3dc020f0c..0461201a5 100644 ---- a/gcc/common/config/aarch64/aarch64-common.cc -+++ b/gcc/common/config/aarch64/aarch64-common.cc -@@ -253,7 +253,7 @@ static const struct processor_name_to_arch all_cores[] = - #define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \ - {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS}, - #include "config/aarch64/aarch64-cores.def" -- {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_ARCH8}, -+ {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_V8A}, - {"", aarch64_no_arch, 0} - }; - -diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def -index 6150448dc..c6bf7d82c 100644 ---- a/gcc/config/aarch64/aarch64-arches.def -+++ b/gcc/config/aarch64/aarch64-arches.def -@@ -30,19 +30,19 @@ - Due to the assumptions about the positions of these fields in config.gcc, - the NAME should be kept as the first argument and FLAGS as the last. */ - --AARCH64_ARCH("armv8-a", generic, 8A, 8, AARCH64_FL_FOR_ARCH8) --AARCH64_ARCH("armv8.1-a", generic, 8_1A, 8, AARCH64_FL_FOR_ARCH8_1) --AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_ARCH8_2) --AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_ARCH8_3) --AARCH64_ARCH("armv8.4-a", generic, 8_4A, 8, AARCH64_FL_FOR_ARCH8_4) --AARCH64_ARCH("armv8.5-a", generic, 8_5A, 8, AARCH64_FL_FOR_ARCH8_5) --AARCH64_ARCH("armv8.6-a", generic, 8_6A, 8, AARCH64_FL_FOR_ARCH8_6) --AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_ARCH8_7) --AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_ARCH8_8) --AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_ARCH8_R) --AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_ARCH9) --AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_ARCH9_1) --AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_ARCH9_2) --AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_ARCH9_3) -+AARCH64_ARCH("armv8-a", generic, 8A, 8, AARCH64_FL_FOR_V8A) -+AARCH64_ARCH("armv8.1-a", generic, 8_1A, 8, AARCH64_FL_FOR_V8_1A) -+AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_V8_2A) -+AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_V8_3A) -+AARCH64_ARCH("armv8.4-a", generic, 8_4A, 8, AARCH64_FL_FOR_V8_4A) -+AARCH64_ARCH("armv8.5-a", generic, 8_5A, 8, AARCH64_FL_FOR_V8_5A) -+AARCH64_ARCH("armv8.6-a", generic, 8_6A, 8, AARCH64_FL_FOR_V8_6A) -+AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_V8_7A) -+AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_V8_8A) -+AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_V8R) -+AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_V9A) -+AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_V9_1A) -+AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_V9_2A) -+AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_V9_3A) - - #undef AARCH64_ARCH -diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def -index 0402bfb74..c4038c641 100644 ---- a/gcc/config/aarch64/aarch64-cores.def -+++ b/gcc/config/aarch64/aarch64-cores.def -@@ -46,132 +46,132 @@ - /* ARMv8-A Architecture Processors. */ - - /* ARM ('A') cores. */ --AARCH64_CORE("cortex-a34", cortexa34, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) --AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) --AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) --AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) --AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) --AARCH64_CORE("cortex-a73", cortexa73, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) -+AARCH64_CORE("cortex-a34", cortexa34, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) -+AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) -+AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) -+AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) -+AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) -+AARCH64_CORE("cortex-a73", cortexa73, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) - - /* Cavium ('C') cores. */ --AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) -+AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) - /* Do not swap around "thunderxt88p1" and "thunderxt88", - this order is required to handle variant correctly. */ --AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) --AARCH64_CORE("thunderxt88", thunderxt88, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) -+AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) -+AARCH64_CORE("thunderxt88", thunderxt88, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) - - /* OcteonTX is the official name for T81/T83. */ --AARCH64_CORE("octeontx", octeontx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) --AARCH64_CORE("octeontx81", octeontxt81, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) --AARCH64_CORE("octeontx83", octeontxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) -+AARCH64_CORE("octeontx", octeontx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) -+AARCH64_CORE("octeontx81", octeontxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) -+AARCH64_CORE("octeontx83", octeontxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) - --AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) --AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) -+AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) -+AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) - - /* Ampere Computing ('\xC0') cores. */ --AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6, ampere1, 0xC0, 0xac3, -1) -+AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1) - /* Do not swap around "emag" and "xgene1", - this order is required to handle variant correctly. */ --AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) -+AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) - - /* APM ('P') cores. */ --AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1) -+AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1) - - /* Qualcomm ('Q') cores. */ --AARCH64_CORE("falkor", falkor, falkor, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) --AARCH64_CORE("qdf24xx", qdf24xx, falkor, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) -+AARCH64_CORE("falkor", falkor, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) -+AARCH64_CORE("qdf24xx", qdf24xx, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) - - /* Samsung ('S') cores. */ --AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) -+AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) - - /* HXT ('h') cores. */ --AARCH64_CORE("phecda", phecda, falkor, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) -+AARCH64_CORE("phecda", phecda, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) - - /* ARMv8.1-A Architecture Processors. */ - - /* Broadcom ('B') cores. */ --AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) --AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) -+AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) -+AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) - - /* Cavium ('C') cores. */ --AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) -+AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) - - /* ARMv8.2-A Architecture Processors. */ - - /* ARM ('A') cores. */ --AARCH64_CORE("cortex-a55", cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) --AARCH64_CORE("cortex-a75", cortexa75, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) --AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) --AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) --AARCH64_CORE("cortex-a77", cortexa77, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) --AARCH64_CORE("cortex-a78", cortexa78, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1) --AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1) --AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1) --AARCH64_CORE("cortex-a65", cortexa65, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) --AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) --AARCH64_CORE("cortex-x1", cortexx1, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1) --AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) --AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) --AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1) -+AARCH64_CORE("cortex-a55", cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) -+AARCH64_CORE("cortex-a75", cortexa75, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) -+AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) -+AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) -+AARCH64_CORE("cortex-a77", cortexa77, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) -+AARCH64_CORE("cortex-a78", cortexa78, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1) -+AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1) -+AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1) -+AARCH64_CORE("cortex-a65", cortexa65, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) -+AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) -+AARCH64_CORE("cortex-x1", cortexx1, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1) -+AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) -+AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) -+AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1) - - /* Cavium ('C') cores. */ --AARCH64_CORE("octeontx2", octeontx2, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1) --AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1) --AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) -+AARCH64_CORE("octeontx2", octeontx2, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1) -+AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1) -+AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) - /* Note OcteonTX2 T93 is an alias to OcteonTX2 T96. */ --AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) --AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1) --AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1) --AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1) -+AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) -+AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1) -+AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1) -+AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1) - - /* Fujitsu ('F') cores. */ --AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1) -+AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1) - - /* HiSilicon ('H') cores. */ --AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) -+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) - - /* ARMv8.3-A Architecture Processors. */ - - /* Marvell cores (TX3). */ --AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) -+AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_V8_3A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) - - /* ARMv8.4-A Architecture Processors. */ - - /* Arm ('A') cores. */ --AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) --AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) --AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) -+AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) -+AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) -+AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) - - /* Qualcomm ('Q') cores. */ --AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) -+AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) - - /* ARMv8-A big.LITTLE implementations. */ - --AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1) --AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1) --AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1) --AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1) -+AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1) -+AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1) -+AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1) -+AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1) - - /* ARM DynamIQ big.LITTLE configurations. */ - --AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1) --AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1) -+AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1) -+AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1) - - /* Armv8-R Architecture Processors. */ --AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_ARCH8_R, cortexa53, 0x41, 0xd15, -1) -+AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_V8R, cortexa53, 0x41, 0xd15, -1) - - /* Armv9.0-A Architecture Processors. */ - - /* Arm ('A') cores. */ --AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) -+AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) - --AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1) -+AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1) - --AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1) -+AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1) - --AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1) -+AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1) - --AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) --AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) -+AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) -+AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) - - #undef AARCH64_CORE -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 254ecfaa2..3714c1047 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -2949,7 +2949,7 @@ static const struct processor all_cores[] = - FLAGS, &COSTS##_tunings}, - #include "aarch64-cores.def" - {"generic", generic, cortexa53, AARCH64_ARCH_8A, -- AARCH64_FL_FOR_ARCH8, &generic_tunings}, -+ AARCH64_FL_FOR_V8A, &generic_tunings}, - {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL} - }; - -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 5a91dfdd2..918a14193 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -255,39 +255,39 @@ - #define AARCH64_FL_FPQ16 (AARCH64_FL_FP & ~AARCH64_FL_SIMD) - - /* Architecture flags that effect instruction selection. */ --#define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD) --#define AARCH64_FL_FOR_ARCH8_1 \ -- (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC \ -+#define AARCH64_FL_FOR_V8A (AARCH64_FL_FPSIMD) -+#define AARCH64_FL_FOR_V8_1A \ -+ (AARCH64_FL_FOR_V8A | AARCH64_FL_LSE | AARCH64_FL_CRC \ - | AARCH64_FL_RDMA | AARCH64_FL_V8_1A) --#define AARCH64_FL_FOR_ARCH8_2 \ -- (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2A) --#define AARCH64_FL_FOR_ARCH8_3 \ -- (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH) --#define AARCH64_FL_FOR_ARCH8_4 \ -- (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \ -+#define AARCH64_FL_FOR_V8_2A \ -+ (AARCH64_FL_FOR_V8_1A | AARCH64_FL_V8_2A) -+#define AARCH64_FL_FOR_V8_3A \ -+ (AARCH64_FL_FOR_V8_2A | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH) -+#define AARCH64_FL_FOR_V8_4A \ -+ (AARCH64_FL_FOR_V8_3A | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \ - | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM) --#define AARCH64_FL_FOR_ARCH8_5 \ -- (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_5A \ -+#define AARCH64_FL_FOR_V8_5A \ -+ (AARCH64_FL_FOR_V8_4A | AARCH64_FL_V8_5A \ - | AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES) --#define AARCH64_FL_FOR_ARCH8_6 \ -- (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_V8_6A | AARCH64_FL_FPSIMD \ -+#define AARCH64_FL_FOR_V8_6A \ -+ (AARCH64_FL_FOR_V8_5A | AARCH64_FL_V8_6A | AARCH64_FL_FPSIMD \ - | AARCH64_FL_I8MM | AARCH64_FL_BF16) --#define AARCH64_FL_FOR_ARCH8_7 \ -- (AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V8_7A | AARCH64_FL_LS64) --#define AARCH64_FL_FOR_ARCH8_8 \ -- (AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V8_8A | AARCH64_FL_MOPS) -- --#define AARCH64_FL_FOR_ARCH8_R \ -- (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8R) --#define AARCH64_FL_FOR_ARCH9 \ -- (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9A \ -+#define AARCH64_FL_FOR_V8_7A \ -+ (AARCH64_FL_FOR_V8_6A | AARCH64_FL_V8_7A | AARCH64_FL_LS64) -+#define AARCH64_FL_FOR_V8_8A \ -+ (AARCH64_FL_FOR_V8_7A | AARCH64_FL_V8_8A | AARCH64_FL_MOPS) -+ -+#define AARCH64_FL_FOR_V8R \ -+ (AARCH64_FL_FOR_V8_4A | AARCH64_FL_V8R) -+#define AARCH64_FL_FOR_V9A \ -+ (AARCH64_FL_FOR_V8_5A | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9A \ - | AARCH64_FL_F16) --#define AARCH64_FL_FOR_ARCH9_1 \ -- (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1A) --#define AARCH64_FL_FOR_ARCH9_2 \ -- (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2A) --#define AARCH64_FL_FOR_ARCH9_3 \ -- (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3A) -+#define AARCH64_FL_FOR_V9_1A \ -+ (AARCH64_FL_FOR_V9A | AARCH64_FL_FOR_V8_6A | AARCH64_FL_V9_1A) -+#define AARCH64_FL_FOR_V9_2A \ -+ (AARCH64_FL_FOR_V9_1A | AARCH64_FL_FOR_V8_7A | AARCH64_FL_V9_2A) -+#define AARCH64_FL_FOR_V9_3A \ -+ (AARCH64_FL_FOR_V9_2A | AARCH64_FL_FOR_V8_8A | AARCH64_FL_V9_3A) - - /* Macros to test ISA flags. */ - --- -2.33.0 - diff --git a/0191-Backport-SME-aarch64-Rename-hard_fp_offset-to-bytes_.patch b/0111-aarch64-Rename-hard-fp-offset-to-bytes-above-hard-fp.patch similarity index 84% rename from 0191-Backport-SME-aarch64-Rename-hard_fp_offset-to-bytes_.patch rename to 0111-aarch64-Rename-hard-fp-offset-to-bytes-above-hard-fp.patch index b02958e..d5e629b 100644 --- a/0191-Backport-SME-aarch64-Rename-hard_fp_offset-to-bytes_.patch +++ b/0111-aarch64-Rename-hard-fp-offset-to-bytes-above-hard-fp.patch @@ -1,14 +1,11 @@ -From e33aa6e25334fd94e1e4f2d8b6c8247029657a54 Mon Sep 17 00:00:00 2001 +From 3fbf0789202b30a67b12e1fb785c7130f098d665 Mon Sep 17 00:00:00 2001 From: Richard Sandiford -Date: Tue, 12 Sep 2023 16:05:08 +0100 -Subject: [PATCH 092/157] [Backport][SME] aarch64: Rename hard_fp_offset to - bytes_above_hard_fp +Date: Tue, 12 Sep 2023 16:08:52 +0100 +Subject: [PATCH] aarch64: Rename hard_fp_offset to bytes_above_hard_fp MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 +Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ed61c87f044f5460109c197855b316641db3c6c6 - Similarly to the previous locals_offset patch, hard_fp_offset was described as: @@ -17,7 +14,7 @@ was described as: STACK_BOUNDARY. */ poly_int64 hard_fp_offset; -which again took an “upside-down” view: higher offsets meant lower +which again took an “upside-down” view: higher offsets meant lower addresses. This patch renames the field to bytes_above_hard_fp instead. gcc/ @@ -33,10 +30,10 @@ gcc/ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 992f71bbd..67199a026 100644 +index d4ec352ba98a..3c4052740e7a 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc -@@ -8929,7 +8929,7 @@ aarch64_layout_frame (void) +@@ -8329,7 +8329,7 @@ aarch64_layout_frame (void) + get_frame_size (), STACK_BOUNDARY / BITS_PER_UNIT); @@ -45,7 +42,7 @@ index 992f71bbd..67199a026 100644 = saved_regs_and_above - frame.below_hard_fp_saved_regs_size; /* Both these values are already aligned. */ -@@ -8978,13 +8978,13 @@ aarch64_layout_frame (void) +@@ -8378,13 +8378,13 @@ aarch64_layout_frame (void) else if (frame.wb_pop_candidate1 != INVALID_REGNUM) max_push_offset = 256; @@ -61,7 +58,7 @@ index 992f71bbd..67199a026 100644 { /* Simple, small frame with no data below the saved registers. -@@ -9001,8 +9001,8 @@ aarch64_layout_frame (void) +@@ -8401,8 +8401,8 @@ aarch64_layout_frame (void) case that it hardly seems worth the effort though. */ && (!saves_below_hard_fp_p || const_below_saved_regs == 0) && !(cfun->calls_alloca @@ -72,7 +69,7 @@ index 992f71bbd..67199a026 100644 { /* Frame with small area below the saved registers: -@@ -9020,12 +9020,12 @@ aarch64_layout_frame (void) +@@ -8420,12 +8420,12 @@ aarch64_layout_frame (void) sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size save SVE registers relative to SP sub sp, sp, bytes_below_saved_regs */ @@ -88,7 +85,7 @@ index 992f71bbd..67199a026 100644 { /* Frame with large area below the saved registers, or with SVE saves, but with a small area above: -@@ -9035,7 +9035,7 @@ aarch64_layout_frame (void) +@@ -8435,7 +8435,7 @@ aarch64_layout_frame (void) [sub sp, sp, below_hard_fp_saved_regs_size] [save SVE registers relative to SP] sub sp, sp, bytes_below_saved_regs */ @@ -97,7 +94,7 @@ index 992f71bbd..67199a026 100644 frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; frame.final_adjust = frame.bytes_below_saved_regs; } -@@ -9050,7 +9050,7 @@ aarch64_layout_frame (void) +@@ -8450,7 +8450,7 @@ aarch64_layout_frame (void) [sub sp, sp, below_hard_fp_saved_regs_size] [save SVE registers relative to SP] sub sp, sp, bytes_below_saved_regs */ @@ -106,7 +103,7 @@ index 992f71bbd..67199a026 100644 frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; frame.final_adjust = frame.bytes_below_saved_regs; } -@@ -10364,7 +10364,7 @@ aarch64_expand_prologue (void) +@@ -9754,7 +9754,7 @@ aarch64_expand_prologue (void) { /* The offset of the frame chain record (if any) from the current SP. */ poly_int64 chain_offset = (initial_adjust + callee_adjust @@ -115,7 +112,7 @@ index 992f71bbd..67199a026 100644 gcc_assert (known_ge (chain_offset, 0)); if (callee_adjust == 0) -@@ -13044,10 +13044,10 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to) +@@ -12575,10 +12575,10 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to) if (to == HARD_FRAME_POINTER_REGNUM) { if (from == ARG_POINTER_REGNUM) @@ -129,10 +126,10 @@ index 992f71bbd..67199a026 100644 if (to == STACK_POINTER_REGNUM) diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 2acff9a96..0f7822c3d 100644 +index bf46e6124aa9..dd1f403f9393 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h -@@ -820,10 +820,10 @@ struct GTY (()) aarch64_frame +@@ -890,10 +890,10 @@ struct GTY (()) aarch64_frame STACK_BOUNDARY. */ poly_int64 bytes_above_locals; @@ -147,5 +144,5 @@ index 2acff9a96..0f7822c3d 100644 /* The size of the frame. This value is the offset from base of the frame (incomming SP) to the stack_pointer. This value is always -- -2.33.0 +2.43.5 diff --git a/0112-Backport-SME-aarch64-Add-V-to-aarch64-arches.def-nam.patch b/0112-Backport-SME-aarch64-Add-V-to-aarch64-arches.def-nam.patch deleted file mode 100644 index d1b1db8..0000000 --- a/0112-Backport-SME-aarch64-Add-V-to-aarch64-arches.def-nam.patch +++ /dev/null @@ -1,315 +0,0 @@ -From ed8ce0b31f2b608f0360af1ffd5375ea7809aba7 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 29 Sep 2022 11:32:52 +0100 -Subject: [PATCH 013/157] [Backport][SME] aarch64: Add "V" to - aarch64-arches.def names - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=00c22ba69d8e738a4789b30165ff9c925c508fc1 - -This patch completes the renaming of architecture-level related -things by adding "V" to the name of the architecture in -aarch64-arches.def. Since the "V" is predictable, we can easily -drop it when we don't need it (as when matching /proc/cpuinfo). - -Having a valid C identifier is necessary for later patches. - -gcc/ - * config/aarch64/aarch64-arches.def: Add a leading "V" to the - ARCH_IDENT fields. - * config/aarch64/aarch64-cores.def: Update accordingly. - * common/config/aarch64/aarch64-common.cc (all_cores): Likewise. - * config/aarch64/aarch64.cc (all_cores): Likewise. - * config/aarch64/driver-aarch64.cc (aarch64_arches): Skip the - leading "V". ---- - gcc/common/config/aarch64/aarch64-common.cc | 2 +- - gcc/config/aarch64/aarch64-arches.def | 28 ++--- - gcc/config/aarch64/aarch64-cores.def | 130 ++++++++++---------- - gcc/config/aarch64/aarch64.cc | 2 +- - gcc/config/aarch64/driver-aarch64.cc | 3 +- - 5 files changed, 83 insertions(+), 82 deletions(-) - -diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc -index 0461201a5..6ca89d31f 100644 ---- a/gcc/common/config/aarch64/aarch64-common.cc -+++ b/gcc/common/config/aarch64/aarch64-common.cc -@@ -253,7 +253,7 @@ static const struct processor_name_to_arch all_cores[] = - #define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \ - {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS}, - #include "config/aarch64/aarch64-cores.def" -- {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_V8A}, -+ {"generic", AARCH64_ARCH_V8A, AARCH64_FL_FOR_V8A}, - {"", aarch64_no_arch, 0} - }; - -diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def -index c6bf7d82c..e42202822 100644 ---- a/gcc/config/aarch64/aarch64-arches.def -+++ b/gcc/config/aarch64/aarch64-arches.def -@@ -30,19 +30,19 @@ - Due to the assumptions about the positions of these fields in config.gcc, - the NAME should be kept as the first argument and FLAGS as the last. */ - --AARCH64_ARCH("armv8-a", generic, 8A, 8, AARCH64_FL_FOR_V8A) --AARCH64_ARCH("armv8.1-a", generic, 8_1A, 8, AARCH64_FL_FOR_V8_1A) --AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_V8_2A) --AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_V8_3A) --AARCH64_ARCH("armv8.4-a", generic, 8_4A, 8, AARCH64_FL_FOR_V8_4A) --AARCH64_ARCH("armv8.5-a", generic, 8_5A, 8, AARCH64_FL_FOR_V8_5A) --AARCH64_ARCH("armv8.6-a", generic, 8_6A, 8, AARCH64_FL_FOR_V8_6A) --AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_V8_7A) --AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_V8_8A) --AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_V8R) --AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_V9A) --AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_V9_1A) --AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_V9_2A) --AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_V9_3A) -+AARCH64_ARCH("armv8-a", generic, V8A, 8, AARCH64_FL_FOR_V8A) -+AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, AARCH64_FL_FOR_V8_1A) -+AARCH64_ARCH("armv8.2-a", generic, V8_2A, 8, AARCH64_FL_FOR_V8_2A) -+AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, AARCH64_FL_FOR_V8_3A) -+AARCH64_ARCH("armv8.4-a", generic, V8_4A, 8, AARCH64_FL_FOR_V8_4A) -+AARCH64_ARCH("armv8.5-a", generic, V8_5A, 8, AARCH64_FL_FOR_V8_5A) -+AARCH64_ARCH("armv8.6-a", generic, V8_6A, 8, AARCH64_FL_FOR_V8_6A) -+AARCH64_ARCH("armv8.7-a", generic, V8_7A, 8, AARCH64_FL_FOR_V8_7A) -+AARCH64_ARCH("armv8.8-a", generic, V8_8A, 8, AARCH64_FL_FOR_V8_8A) -+AARCH64_ARCH("armv8-r", generic, V8R , 8, AARCH64_FL_FOR_V8R) -+AARCH64_ARCH("armv9-a", generic, V9A , 9, AARCH64_FL_FOR_V9A) -+AARCH64_ARCH("armv9.1-a", generic, V9_1A, 9, AARCH64_FL_FOR_V9_1A) -+AARCH64_ARCH("armv9.2-a", generic, V9_2A, 9, AARCH64_FL_FOR_V9_2A) -+AARCH64_ARCH("armv9.3-a", generic, V9_3A, 9, AARCH64_FL_FOR_V9_3A) - - #undef AARCH64_ARCH -diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def -index c4038c641..f4c2f4ea4 100644 ---- a/gcc/config/aarch64/aarch64-cores.def -+++ b/gcc/config/aarch64/aarch64-cores.def -@@ -46,132 +46,132 @@ - /* ARMv8-A Architecture Processors. */ - - /* ARM ('A') cores. */ --AARCH64_CORE("cortex-a34", cortexa34, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) --AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) --AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) --AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) --AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) --AARCH64_CORE("cortex-a73", cortexa73, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) -+AARCH64_CORE("cortex-a34", cortexa34, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) -+AARCH64_CORE("cortex-a35", cortexa35, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) -+AARCH64_CORE("cortex-a53", cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) -+AARCH64_CORE("cortex-a57", cortexa57, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) -+AARCH64_CORE("cortex-a72", cortexa72, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) -+AARCH64_CORE("cortex-a73", cortexa73, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) - - /* Cavium ('C') cores. */ --AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) -+AARCH64_CORE("thunderx", thunderx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) - /* Do not swap around "thunderxt88p1" and "thunderxt88", - this order is required to handle variant correctly. */ --AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) --AARCH64_CORE("thunderxt88", thunderxt88, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) -+AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) -+AARCH64_CORE("thunderxt88", thunderxt88, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) - - /* OcteonTX is the official name for T81/T83. */ --AARCH64_CORE("octeontx", octeontx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) --AARCH64_CORE("octeontx81", octeontxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) --AARCH64_CORE("octeontx83", octeontxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) -+AARCH64_CORE("octeontx", octeontx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) -+AARCH64_CORE("octeontx81", octeontxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) -+AARCH64_CORE("octeontx83", octeontxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) - --AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) --AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) -+AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) -+AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) - - /* Ampere Computing ('\xC0') cores. */ --AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1) -+AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1) - /* Do not swap around "emag" and "xgene1", - this order is required to handle variant correctly. */ --AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) -+AARCH64_CORE("emag", emag, xgene1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) - - /* APM ('P') cores. */ --AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1) -+AARCH64_CORE("xgene1", xgene1, xgene1, V8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1) - - /* Qualcomm ('Q') cores. */ --AARCH64_CORE("falkor", falkor, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) --AARCH64_CORE("qdf24xx", qdf24xx, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) -+AARCH64_CORE("falkor", falkor, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) -+AARCH64_CORE("qdf24xx", qdf24xx, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) - - /* Samsung ('S') cores. */ --AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) -+AARCH64_CORE("exynos-m1", exynosm1, exynosm1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) - - /* HXT ('h') cores. */ --AARCH64_CORE("phecda", phecda, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) -+AARCH64_CORE("phecda", phecda, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) - - /* ARMv8.1-A Architecture Processors. */ - - /* Broadcom ('B') cores. */ --AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) --AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) -+AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) -+AARCH64_CORE("vulcan", vulcan, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) - - /* Cavium ('C') cores. */ --AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) -+AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) - - /* ARMv8.2-A Architecture Processors. */ - - /* ARM ('A') cores. */ --AARCH64_CORE("cortex-a55", cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) --AARCH64_CORE("cortex-a75", cortexa75, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) --AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) --AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) --AARCH64_CORE("cortex-a77", cortexa77, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) --AARCH64_CORE("cortex-a78", cortexa78, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1) --AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1) --AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1) --AARCH64_CORE("cortex-a65", cortexa65, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) --AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) --AARCH64_CORE("cortex-x1", cortexx1, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1) --AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) --AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) --AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1) -+AARCH64_CORE("cortex-a55", cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) -+AARCH64_CORE("cortex-a75", cortexa75, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) -+AARCH64_CORE("cortex-a76", cortexa76, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) -+AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) -+AARCH64_CORE("cortex-a77", cortexa77, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) -+AARCH64_CORE("cortex-a78", cortexa78, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1) -+AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1) -+AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1) -+AARCH64_CORE("cortex-a65", cortexa65, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) -+AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) -+AARCH64_CORE("cortex-x1", cortexx1, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1) -+AARCH64_CORE("ares", ares, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) -+AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) -+AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1) - - /* Cavium ('C') cores. */ --AARCH64_CORE("octeontx2", octeontx2, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1) --AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1) --AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) -+AARCH64_CORE("octeontx2", octeontx2, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1) -+AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1) -+AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) - /* Note OcteonTX2 T93 is an alias to OcteonTX2 T96. */ --AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) --AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1) --AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1) --AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1) -+AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) -+AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1) -+AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1) -+AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1) - - /* Fujitsu ('F') cores. */ --AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1) -+AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1) - - /* HiSilicon ('H') cores. */ --AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) -+AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) - - /* ARMv8.3-A Architecture Processors. */ - - /* Marvell cores (TX3). */ --AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_V8_3A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) -+AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_FOR_V8_3A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) - - /* ARMv8.4-A Architecture Processors. */ - - /* Arm ('A') cores. */ --AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) --AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) --AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) -+AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) -+AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) -+AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) - - /* Qualcomm ('Q') cores. */ --AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) -+AARCH64_CORE("saphira", saphira, saphira, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) - - /* ARMv8-A big.LITTLE implementations. */ - --AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1) --AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1) --AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1) --AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1) -+AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1) -+AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1) -+AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1) -+AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1) - - /* ARM DynamIQ big.LITTLE configurations. */ - --AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1) --AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1) -+AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1) -+AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1) - - /* Armv8-R Architecture Processors. */ --AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_V8R, cortexa53, 0x41, 0xd15, -1) -+AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, AARCH64_FL_FOR_V8R, cortexa53, 0x41, 0xd15, -1) - - /* Armv9.0-A Architecture Processors. */ - - /* Arm ('A') cores. */ --AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) -+AARCH64_CORE("cortex-a510", cortexa510, cortexa55, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) - --AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1) -+AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1) - --AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1) -+AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1) - --AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1) -+AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1) - --AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) --AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) -+AARCH64_CORE("demeter", demeter, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) -+AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) - - #undef AARCH64_CORE -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 3714c1047..22b51e12f 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -2948,7 +2948,7 @@ static const struct processor all_cores[] = - {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \ - FLAGS, &COSTS##_tunings}, - #include "aarch64-cores.def" -- {"generic", generic, cortexa53, AARCH64_ARCH_8A, -+ {"generic", generic, cortexa53, AARCH64_ARCH_V8A, - AARCH64_FL_FOR_V8A, &generic_tunings}, - {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL} - }; -diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc -index d714a8bda..644780ef2 100644 ---- a/gcc/config/aarch64/driver-aarch64.cc -+++ b/gcc/config/aarch64/driver-aarch64.cc -@@ -78,8 +78,9 @@ struct aarch64_arch_driver_info - const uint64_t flags; - }; - -+/* Skip the leading "V" in the architecture name. */ - #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \ -- { #ARCH_IDENT, NAME, FLAGS }, -+ { #ARCH_IDENT + 1, NAME, FLAGS }, - - static struct aarch64_arch_driver_info aarch64_arches[] = - { --- -2.33.0 - diff --git a/0192-Backport-SME-aarch64-Tweak-frame_size-comment.patch b/0112-aarch64-Tweak-frame-size-comment.patch similarity index 67% rename from 0192-Backport-SME-aarch64-Tweak-frame_size-comment.patch rename to 0112-aarch64-Tweak-frame-size-comment.patch index def17bb..52abf94 100644 --- a/0192-Backport-SME-aarch64-Tweak-frame_size-comment.patch +++ b/0112-aarch64-Tweak-frame-size-comment.patch @@ -1,15 +1,13 @@ -From 6aa0db727b6e3a7fed95b014f25f3f022d1f46e2 Mon Sep 17 00:00:00 2001 +From aac8b31379ac3bbd14fc6427dce23f56e54e8485 Mon Sep 17 00:00:00 2001 From: Richard Sandiford -Date: Tue, 12 Sep 2023 16:05:08 +0100 -Subject: [PATCH 093/157] [Backport][SME] aarch64: Tweak frame_size comment +Date: Tue, 12 Sep 2023 16:08:52 +0100 +Subject: [PATCH] aarch64: Tweak frame_size comment MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 +Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=492b60670e69b0a7f11345b69a3c922c20d5d8c3 - This patch fixes another case in which a value was described with -an “upside-down” view. +an “upside-down” view. gcc/ * config/aarch64/aarch64.h (aarch64_frame::frame_size): Tweak comment. @@ -18,10 +16,10 @@ gcc/ 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 0f7822c3d..39abca051 100644 +index dd1f403f9393..700524ae22bf 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h -@@ -825,8 +825,8 @@ struct GTY (()) aarch64_frame +@@ -895,8 +895,8 @@ struct GTY (()) aarch64_frame STACK_BOUNDARY. */ poly_int64 bytes_above_hard_fp; @@ -33,5 +31,5 @@ index 0f7822c3d..39abca051 100644 poly_int64 frame_size; -- -2.33.0 +2.43.5 diff --git a/0113-Backport-SME-aarch64-Small-config.gcc-cleanups.patch b/0113-Backport-SME-aarch64-Small-config.gcc-cleanups.patch deleted file mode 100644 index 1b14c18..0000000 --- a/0113-Backport-SME-aarch64-Small-config.gcc-cleanups.patch +++ /dev/null @@ -1,55 +0,0 @@ -From aac2b2d4191d08a107c3ff8d98602355988a5558 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 29 Sep 2022 11:32:52 +0100 -Subject: [PATCH 014/157] [Backport][SME] aarch64: Small config.gcc cleanups - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0af214b447529453b356e8e480d7d35b3e642f0e - -The aarch64-option-extensions.def parsing in config.gcc had -some code left over from when it tried to parse the whole -macro definition. Also, config.gcc now only looks at the -first fields of the aarch64-arches.def entries. - -gcc/ - * config.gcc: Remove dead aarch64-option-extensions.def code. - * config/aarch64/aarch64-arches.def: Update comment. ---- - gcc/config.gcc | 8 -------- - gcc/config/aarch64/aarch64-arches.def | 2 +- - 2 files changed, 1 insertion(+), 9 deletions(-) - -diff --git a/gcc/config.gcc b/gcc/config.gcc -index 3be450471..da66603cd 100644 ---- a/gcc/config.gcc -+++ b/gcc/config.gcc -@@ -4210,14 +4210,6 @@ case "${target}" in - options_parsed="`$ac_cv_prog_CPP -D"$opt_macro" -x c \ - ${srcdir}/config/aarch64/aarch64-option-extensions.def`" - -- # Match one element inside AARCH64_OPT_EXTENSION, we -- # consume anything that's not a ,. -- elem="[ ]*\([^,]\+\)[ ]*" -- -- # Repeat the pattern for the number of entries in the -- # AARCH64_OPT_EXTENSION, currently 6 times. -- sed_patt="^$elem,$elem,$elem,$elem,$elem,$elem" -- - while [ x"$ext_val" != x ] - do - ext_val=`echo $ext_val | sed -e 's/\+//'` -diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def -index e42202822..ece96e22a 100644 ---- a/gcc/config/aarch64/aarch64-arches.def -+++ b/gcc/config/aarch64/aarch64-arches.def -@@ -28,7 +28,7 @@ - ARCH_REV is an integer specifying the architecture major revision. - FLAGS are the flags implied by the architecture. - Due to the assumptions about the positions of these fields in config.gcc, -- the NAME should be kept as the first argument and FLAGS as the last. */ -+ NAME should be kept as the first argument. */ - - AARCH64_ARCH("armv8-a", generic, V8A, 8, AARCH64_FL_FOR_V8A) - AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, AARCH64_FL_FOR_V8_1A) --- -2.33.0 - diff --git a/0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch b/0113-aarch64-Measure-reg-offset-from-the-bottom-of-the-frame.patch similarity index 86% rename from 0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch rename to 0113-aarch64-Measure-reg-offset-from-the-bottom-of-the-frame.patch index c036731..32321ef 100644 --- a/0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch +++ b/0113-aarch64-Measure-reg-offset-from-the-bottom-of-the-frame.patch @@ -1,10 +1,7 @@ -From 3b10711c6a5610bf8e2287b9491557268ee148da Mon Sep 17 00:00:00 2001 +From 8d5506a8aeb8dd7e8b209a3663b07688478f76b9 Mon Sep 17 00:00:00 2001 From: Richard Sandiford -Date: Tue, 12 Sep 2023 16:05:09 +0100 -Subject: [PATCH 094/157] [Backport][SME] aarch64: Measure reg_offset from the - bottom of the frame - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=67a36b6f5d6be11d280081b461e72910aca2fc54 +Date: Tue, 12 Sep 2023 16:08:53 +0100 +Subject: [PATCH] aarch64: Measure reg_offset from the bottom of the frame reg_offset was measured from the bottom of the saved register area. This made perfect sense with the original layout, since the bottom @@ -34,10 +31,10 @@ gcc/ 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 67199a026..df8a83b04 100644 +index 3c4052740e7a..97dd077844b4 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc -@@ -8739,7 +8739,6 @@ aarch64_needs_frame_chain (void) +@@ -8139,7 +8139,6 @@ aarch64_needs_frame_chain (void) static void aarch64_layout_frame (void) { @@ -45,7 +42,7 @@ index 67199a026..df8a83b04 100644 int regno, last_fp_reg = INVALID_REGNUM; machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM); poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode); -@@ -8817,7 +8816,9 @@ aarch64_layout_frame (void) +@@ -8217,7 +8216,9 @@ aarch64_layout_frame (void) gcc_assert (crtl->is_leaf || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED)); @@ -56,7 +53,7 @@ index 67199a026..df8a83b04 100644 /* Now assign stack slots for the registers. Start with the predicate registers, since predicate LDR and STR have a relatively small -@@ -8829,7 +8830,8 @@ aarch64_layout_frame (void) +@@ -8229,7 +8230,8 @@ aarch64_layout_frame (void) offset += BYTES_PER_SVE_PRED; } @@ -66,7 +63,7 @@ index 67199a026..df8a83b04 100644 { /* If we have any vector registers to save above the predicate registers, the offset of the vector register save slots need to be a multiple -@@ -8847,10 +8849,10 @@ aarch64_layout_frame (void) +@@ -8247,10 +8249,10 @@ aarch64_layout_frame (void) offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); else { @@ -81,7 +78,7 @@ index 67199a026..df8a83b04 100644 else gcc_unreachable (); } -@@ -8867,9 +8869,10 @@ aarch64_layout_frame (void) +@@ -8267,9 +8269,10 @@ aarch64_layout_frame (void) /* OFFSET is now the offset of the hard frame pointer from the bottom of the callee save area. */ @@ -95,7 +92,7 @@ index 67199a026..df8a83b04 100644 if (frame.emit_frame_chain) { /* FP and LR are placed in the linkage record. */ -@@ -8920,9 +8923,10 @@ aarch64_layout_frame (void) +@@ -8320,9 +8323,10 @@ aarch64_layout_frame (void) offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); @@ -108,7 +105,7 @@ index 67199a026..df8a83b04 100644 poly_int64 saved_regs_and_above = aligned_upper_bound (varargs_and_saved_regs_size -@@ -9390,9 +9394,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, +@@ -8790,9 +8794,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, machine_mode mode = aarch64_reg_save_mode (regno); reg = gen_rtx_REG (mode, regno); @@ -119,7 +116,7 @@ index 67199a026..df8a83b04 100644 rtx base_rtx = stack_pointer_rtx; poly_int64 sp_offset = offset; -@@ -9499,9 +9501,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start, +@@ -8899,9 +8901,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start, machine_mode mode = aarch64_reg_save_mode (regno); reg = gen_rtx_REG (mode, regno); @@ -130,7 +127,7 @@ index 67199a026..df8a83b04 100644 rtx base_rtx = stack_pointer_rtx; if (mode == VNx2DImode && BYTES_BIG_ENDIAN) aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, -@@ -9640,14 +9640,12 @@ aarch64_get_separate_components (void) +@@ -9040,14 +9040,12 @@ aarch64_get_separate_components (void) it as a stack probe for -fstack-clash-protection. */ if (flag_stack_clash_protection && maybe_ne (frame.below_hard_fp_saved_regs_size, 0) @@ -147,7 +144,7 @@ index 67199a026..df8a83b04 100644 /* Check that we can access the stack slot of the register with one direct load with no adjustments needed. */ -@@ -9794,9 +9792,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) +@@ -9194,9 +9192,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) rtx reg = gen_rtx_REG (mode, regno); poly_int64 offset = frame.reg_offset[regno]; if (frame_pointer_needed) @@ -158,7 +155,7 @@ index 67199a026..df8a83b04 100644 rtx addr = plus_constant (Pmode, ptr_reg, offset); rtx mem = gen_frame_mem (mode, addr); -@@ -9848,9 +9844,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) +@@ -9248,9 +9244,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) /* REGNO2 can be saved/restored in a pair with REGNO. */ rtx reg2 = gen_rtx_REG (mode, regno2); if (frame_pointer_needed) @@ -169,7 +166,7 @@ index 67199a026..df8a83b04 100644 rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); rtx mem2 = gen_frame_mem (mode, addr2); rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2) -@@ -9976,7 +9970,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, +@@ -9366,7 +9360,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, if (final_adjustment_p && known_eq (frame.below_hard_fp_saved_regs_size, 0)) { @@ -180,10 +177,10 @@ index 67199a026..df8a83b04 100644 min_probe_threshold -= lr_offset.to_constant (); else diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 39abca051..f340237d0 100644 +index 700524ae22bf..b61358370732 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h -@@ -790,6 +790,9 @@ extern enum aarch64_processor aarch64_tune; +@@ -860,6 +860,9 @@ extern enum aarch64_processor aarch64_tune; #ifdef HAVE_POLY_INT_H struct GTY (()) aarch64_frame { @@ -194,5 +191,5 @@ index 39abca051..f340237d0 100644 /* The number of extra stack bytes taken up by register varargs. -- -2.33.0 +2.43.5 diff --git a/0114-Backport-SME-aarch64-Avoid-redundancy-in-aarch64-cor.patch b/0114-Backport-SME-aarch64-Avoid-redundancy-in-aarch64-cor.patch deleted file mode 100644 index ba9f2ab..0000000 --- a/0114-Backport-SME-aarch64-Avoid-redundancy-in-aarch64-cor.patch +++ /dev/null @@ -1,273 +0,0 @@ -From f6f28c50045f672a35f5b7344b556fc45dc0b3a1 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 29 Sep 2022 11:32:53 +0100 -Subject: [PATCH 015/157] [Backport][SME] aarch64: Avoid redundancy in - aarch64-cores.def - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=198bb6ed327c74eb2b0450bf978e4e6a64a6406c - -The flags fields of the aarch64-cores.def always start with -AARCH64_FL_FOR_. After previous changes, is always -identical to the previous field, so we can drop the explicit -AARCH64_FL_FOR_ and derive it programmatically. - -This isn't a big saving in itself, but it helps with later patches. - -gcc/ - * config/aarch64/aarch64-cores.def: Remove AARCH64_FL_FOR_ - from the flags field. - * common/config/aarch64/aarch64-common.cc (all_cores): Add it - here instead. - * config/aarch64/aarch64.cc (all_cores): Likewise. - * config/aarch64/driver-aarch64.cc (all_cores): Likewise. ---- - gcc/common/config/aarch64/aarch64-common.cc | 2 +- - gcc/config/aarch64/aarch64-cores.def | 130 ++++++++++---------- - gcc/config/aarch64/aarch64.cc | 2 +- - gcc/config/aarch64/driver-aarch64.cc | 2 +- - 4 files changed, 68 insertions(+), 68 deletions(-) - -diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc -index 6ca89d31f..a965ac660 100644 ---- a/gcc/common/config/aarch64/aarch64-common.cc -+++ b/gcc/common/config/aarch64/aarch64-common.cc -@@ -251,7 +251,7 @@ struct arch_to_arch_name - static const struct processor_name_to_arch all_cores[] = - { - #define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \ -- {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS}, -+ {NAME, AARCH64_ARCH_##ARCH_IDENT, AARCH64_FL_FOR_##ARCH_IDENT | FLAGS}, - #include "config/aarch64/aarch64-cores.def" - {"generic", AARCH64_ARCH_V8A, AARCH64_FL_FOR_V8A}, - {"", aarch64_no_arch, 0} -diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def -index f4c2f4ea4..008b0b8c1 100644 ---- a/gcc/config/aarch64/aarch64-cores.def -+++ b/gcc/config/aarch64/aarch64-cores.def -@@ -46,132 +46,132 @@ - /* ARMv8-A Architecture Processors. */ - - /* ARM ('A') cores. */ --AARCH64_CORE("cortex-a34", cortexa34, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) --AARCH64_CORE("cortex-a35", cortexa35, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) --AARCH64_CORE("cortex-a53", cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) --AARCH64_CORE("cortex-a57", cortexa57, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) --AARCH64_CORE("cortex-a72", cortexa72, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) --AARCH64_CORE("cortex-a73", cortexa73, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) -+AARCH64_CORE("cortex-a34", cortexa34, cortexa53, V8A, AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) -+AARCH64_CORE("cortex-a35", cortexa35, cortexa53, V8A, AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) -+AARCH64_CORE("cortex-a53", cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) -+AARCH64_CORE("cortex-a57", cortexa57, cortexa57, V8A, AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) -+AARCH64_CORE("cortex-a72", cortexa72, cortexa57, V8A, AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) -+AARCH64_CORE("cortex-a73", cortexa73, cortexa57, V8A, AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) - - /* Cavium ('C') cores. */ --AARCH64_CORE("thunderx", thunderx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) -+AARCH64_CORE("thunderx", thunderx, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) - /* Do not swap around "thunderxt88p1" and "thunderxt88", - this order is required to handle variant correctly. */ --AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) --AARCH64_CORE("thunderxt88", thunderxt88, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) -+AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) -+AARCH64_CORE("thunderxt88", thunderxt88, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) - - /* OcteonTX is the official name for T81/T83. */ --AARCH64_CORE("octeontx", octeontx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) --AARCH64_CORE("octeontx81", octeontxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) --AARCH64_CORE("octeontx83", octeontxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) -+AARCH64_CORE("octeontx", octeontx, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) -+AARCH64_CORE("octeontx81", octeontxt81, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) -+AARCH64_CORE("octeontx83", octeontxt83, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) - --AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) --AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) -+AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) -+AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) - - /* Ampere Computing ('\xC0') cores. */ --AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1) -+AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, 0, ampere1, 0xC0, 0xac3, -1) - /* Do not swap around "emag" and "xgene1", - this order is required to handle variant correctly. */ --AARCH64_CORE("emag", emag, xgene1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) -+AARCH64_CORE("emag", emag, xgene1, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) - - /* APM ('P') cores. */ --AARCH64_CORE("xgene1", xgene1, xgene1, V8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1) -+AARCH64_CORE("xgene1", xgene1, xgene1, V8A, 0, xgene1, 0x50, 0x000, -1) - - /* Qualcomm ('Q') cores. */ --AARCH64_CORE("falkor", falkor, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) --AARCH64_CORE("qdf24xx", qdf24xx, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) -+AARCH64_CORE("falkor", falkor, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) -+AARCH64_CORE("qdf24xx", qdf24xx, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) - - /* Samsung ('S') cores. */ --AARCH64_CORE("exynos-m1", exynosm1, exynosm1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) -+AARCH64_CORE("exynos-m1", exynosm1, exynosm1, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) - - /* HXT ('h') cores. */ --AARCH64_CORE("phecda", phecda, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) -+AARCH64_CORE("phecda", phecda, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) - - /* ARMv8.1-A Architecture Processors. */ - - /* Broadcom ('B') cores. */ --AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) --AARCH64_CORE("vulcan", vulcan, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) -+AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) -+AARCH64_CORE("vulcan", vulcan, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) - - /* Cavium ('C') cores. */ --AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) -+AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) - - /* ARMv8.2-A Architecture Processors. */ - - /* ARM ('A') cores. */ --AARCH64_CORE("cortex-a55", cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) --AARCH64_CORE("cortex-a75", cortexa75, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) --AARCH64_CORE("cortex-a76", cortexa76, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) --AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) --AARCH64_CORE("cortex-a77", cortexa77, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) --AARCH64_CORE("cortex-a78", cortexa78, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1) --AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1) --AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1) --AARCH64_CORE("cortex-a65", cortexa65, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) --AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) --AARCH64_CORE("cortex-x1", cortexx1, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1) --AARCH64_CORE("ares", ares, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) --AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) --AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1) -+AARCH64_CORE("cortex-a55", cortexa55, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) -+AARCH64_CORE("cortex-a75", cortexa75, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) -+AARCH64_CORE("cortex-a76", cortexa76, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) -+AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) -+AARCH64_CORE("cortex-a77", cortexa77, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) -+AARCH64_CORE("cortex-a78", cortexa78, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1) -+AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1) -+AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1) -+AARCH64_CORE("cortex-a65", cortexa65, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) -+AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) -+AARCH64_CORE("cortex-x1", cortexx1, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1) -+AARCH64_CORE("ares", ares, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) -+AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) -+AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1) - - /* Cavium ('C') cores. */ --AARCH64_CORE("octeontx2", octeontx2, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1) --AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1) --AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) -+AARCH64_CORE("octeontx2", octeontx2, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1) -+AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1) -+AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) - /* Note OcteonTX2 T93 is an alias to OcteonTX2 T96. */ --AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) --AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1) --AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1) --AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1) -+AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) -+AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1) -+AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1) -+AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1) - - /* Fujitsu ('F') cores. */ --AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1) -+AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1) - - /* HiSilicon ('H') cores. */ --AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) -+AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) - - /* ARMv8.3-A Architecture Processors. */ - - /* Marvell cores (TX3). */ --AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_FOR_V8_3A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) -+AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) - - /* ARMv8.4-A Architecture Processors. */ - - /* Arm ('A') cores. */ --AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) --AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) --AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) -+AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) -+AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) -+AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) - - /* Qualcomm ('Q') cores. */ --AARCH64_CORE("saphira", saphira, saphira, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) -+AARCH64_CORE("saphira", saphira, saphira, V8_4A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) - - /* ARMv8-A big.LITTLE implementations. */ - --AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1) --AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1) --AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1) --AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1) -+AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1) -+AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1) -+AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, V8A, AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1) -+AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1) - - /* ARM DynamIQ big.LITTLE configurations. */ - --AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1) --AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1) -+AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1) -+AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1) - - /* Armv8-R Architecture Processors. */ --AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, AARCH64_FL_FOR_V8R, cortexa53, 0x41, 0xd15, -1) -+AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, 0, cortexa53, 0x41, 0xd15, -1) - - /* Armv9.0-A Architecture Processors. */ - - /* Arm ('A') cores. */ --AARCH64_CORE("cortex-a510", cortexa510, cortexa55, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) -+AARCH64_CORE("cortex-a510", cortexa510, cortexa55, V9A, AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) - --AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1) -+AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1) - --AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1) -+AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1) - --AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1) -+AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1) - --AARCH64_CORE("demeter", demeter, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) --AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) -+AARCH64_CORE("demeter", demeter, cortexa57, V9A, AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) -+AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) - - #undef AARCH64_CORE -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 22b51e12f..f975aad07 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -2946,7 +2946,7 @@ static const struct processor all_cores[] = - { - #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \ - {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \ -- FLAGS, &COSTS##_tunings}, -+ AARCH64_FL_FOR_##ARCH | FLAGS, &COSTS##_tunings}, - #include "aarch64-cores.def" - {"generic", generic, cortexa53, AARCH64_ARCH_V8A, - AARCH64_FL_FOR_V8A, &generic_tunings}, -diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc -index 644780ef2..97690de62 100644 ---- a/gcc/config/aarch64/driver-aarch64.cc -+++ b/gcc/config/aarch64/driver-aarch64.cc -@@ -62,7 +62,7 @@ struct aarch64_core_data - #define DEFAULT_ARCH "8A" - - #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \ -- { CORE_NAME, #ARCH, IMP, PART, VARIANT, FLAGS }, -+ { CORE_NAME, #ARCH, IMP, PART, VARIANT, AARCH64_FL_FOR_##ARCH | FLAGS }, - - static struct aarch64_core_data aarch64_cpu_data[] = - { --- -2.33.0 - diff --git a/0194-Backport-SME-aarch64-Simplify-top-of-frame-allocatio.patch b/0114-aarch64-Simplify-top-of-frame-allocation.patch similarity index 82% rename from 0194-Backport-SME-aarch64-Simplify-top-of-frame-allocatio.patch rename to 0114-aarch64-Simplify-top-of-frame-allocation.patch index f671841..ac0da4f 100644 --- a/0194-Backport-SME-aarch64-Simplify-top-of-frame-allocatio.patch +++ b/0114-aarch64-Simplify-top-of-frame-allocation.patch @@ -1,10 +1,7 @@ -From 4b8f3f194e68d0d411eaa6692699d8e5e2b4217d Mon Sep 17 00:00:00 2001 +From b47766614df3b9df878262efb2ad73aaac108363 Mon Sep 17 00:00:00 2001 From: Richard Sandiford -Date: Tue, 12 Sep 2023 16:05:09 +0100 -Subject: [PATCH 095/157] [Backport][SME] aarch64: Simplify top of frame - allocation - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bc9dcdde80915d7585a21daa2b69f4adf4a1e3c1 +Date: Tue, 12 Sep 2023 16:08:53 +0100 +Subject: [PATCH] aarch64: Simplify top of frame allocation After previous patches, it no longer really makes sense to allocate the top of the frame in terms of varargs_and_saved_regs_size and @@ -18,10 +15,10 @@ gcc/ 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index df8a83b04..3329aa364 100644 +index 97dd077844b4..81935852d5b2 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc -@@ -8925,23 +8925,16 @@ aarch64_layout_frame (void) +@@ -8325,23 +8325,16 @@ aarch64_layout_frame (void) frame.saved_regs_size = offset - frame.bytes_below_saved_regs; @@ -54,5 +51,5 @@ index df8a83b04..3329aa364 100644 frame.initial_adjust = 0; frame.final_adjust = 0; -- -2.33.0 +2.43.5 diff --git a/0115-Backport-SME-aarch64-Remove-AARCH64_FL_RCPC8_4-PR107.patch b/0115-Backport-SME-aarch64-Remove-AARCH64_FL_RCPC8_4-PR107.patch deleted file mode 100644 index f65a31b..0000000 --- a/0115-Backport-SME-aarch64-Remove-AARCH64_FL_RCPC8_4-PR107.patch +++ /dev/null @@ -1,83 +0,0 @@ -From f6137d5be2761caea75dcc1c98d941ceec161456 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 29 Sep 2022 11:32:53 +0100 -Subject: [PATCH 016/157] [Backport][SME] aarch64: Remove AARCH64_FL_RCPC8_4 - [PR107025] - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0f244d848cffeda68f0eb4c5bb9c7e629bf2e957 - -AARCH64_FL_RCPC8_4 is an odd-one-out in that it has no associated -entry in aarch64-option-extensions.def. This means that, although -it is internally separated from AARCH64_FL_V8_4A, there is no -mechanism for turning it on and off individually, independently -of armv8.4-a. - -The only place that the flag was used independently was in the -entry for thunderx3t110, which enabled it alongside V8_3A. -As noted in PR107025, this means that any use of the extension -will fail to assemble. - -In the PR trail, Andrew suggested removing the core entry. -That might be best long-term, but since the barrier for removing -command-line options without a deprecation period is very high, -this patch instead just drops the flag from the core entry. -We'll still produce correct code. - -gcc/ - PR target/107025 - * config/aarch64/aarch64.h (oAARCH64_FL_RCPC8_4): Delete. - (AARCH64_FL_FOR_V8_4A): Update accordingly. - (AARCH64_ISA_RCPC8_4): Use AARCH64_FL_V8_4A directly. - * config/aarch64/aarch64-cores.def (thunderx3t110): Remove - AARCH64_FL_RCPC8_4. ---- - gcc/config/aarch64/aarch64-cores.def | 2 +- - gcc/config/aarch64/aarch64.h | 5 ++--- - 2 files changed, 3 insertions(+), 4 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def -index 008b0b8c1..cf500d0a9 100644 ---- a/gcc/config/aarch64/aarch64-cores.def -+++ b/gcc/config/aarch64/aarch64-cores.def -@@ -133,7 +133,7 @@ AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_F - /* ARMv8.3-A Architecture Processors. */ - - /* Marvell cores (TX3). */ --AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a) -+AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML, thunderx3t110, 0x43, 0x0b8, 0x0a) - - /* ARMv8.4-A Architecture Processors. */ - -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 918a14193..f4e0cd148 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -173,7 +173,6 @@ - #define AARCH64_FL_SM4 (1 << 17) /* Has ARMv8.4-A SM3 and SM4. */ - #define AARCH64_FL_SHA3 (1 << 18) /* Has ARMv8.4-a SHA3 and SHA512. */ - #define AARCH64_FL_F16FML (1 << 19) /* Has ARMv8.4-a FP16 extensions. */ --#define AARCH64_FL_RCPC8_4 (1 << 20) /* Has ARMv8.4-a RCPC extensions. */ - - /* Statistical Profiling extensions. */ - #define AARCH64_FL_PROFILE (1 << 21) -@@ -265,7 +264,7 @@ - (AARCH64_FL_FOR_V8_2A | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH) - #define AARCH64_FL_FOR_V8_4A \ - (AARCH64_FL_FOR_V8_3A | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \ -- | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM) -+ | AARCH64_FL_DOTPROD | AARCH64_FL_FLAGM) - #define AARCH64_FL_FOR_V8_5A \ - (AARCH64_FL_FOR_V8_4A | AARCH64_FL_V8_5A \ - | AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES) -@@ -313,7 +312,7 @@ - #define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4) - #define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3) - #define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML) --#define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4) -+#define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_V8_4A) - #define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG) - #define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5A) - #define AARCH64_ISA_TME (aarch64_isa_flags & AARCH64_FL_TME) --- -2.33.0 - diff --git a/0195-Backport-SME-aarch64-Minor-initial-adjustment-tweak.patch b/0115-aarch64-Minor-initial-adjustment-tweak.patch similarity index 75% rename from 0195-Backport-SME-aarch64-Minor-initial-adjustment-tweak.patch rename to 0115-aarch64-Minor-initial-adjustment-tweak.patch index d962cb9..8f282c5 100644 --- a/0195-Backport-SME-aarch64-Minor-initial-adjustment-tweak.patch +++ b/0115-aarch64-Minor-initial-adjustment-tweak.patch @@ -1,10 +1,7 @@ -From 0ab484f5de7d28c0a7166439d403e0983834b120 Mon Sep 17 00:00:00 2001 +From 08f71b4bb28fb74d20e8d2927a557e8119ce9f4d Mon Sep 17 00:00:00 2001 From: Richard Sandiford -Date: Tue, 12 Sep 2023 16:05:10 +0100 -Subject: [PATCH 096/157] [Backport][SME] aarch64: Minor initial adjustment - tweak - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ee5466ff4faca2076cc61f1f120d0b5062c8111c +Date: Tue, 12 Sep 2023 16:08:54 +0100 +Subject: [PATCH] aarch64: Minor initial adjustment tweak This patch just changes a calculation of initial_adjust to one that makes it slightly more obvious that the total @@ -19,10 +16,10 @@ gcc/ 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 3329aa364..72604dd9d 100644 +index 81935852d5b2..4d9fcf3d1623 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc -@@ -9014,11 +9014,10 @@ aarch64_layout_frame (void) +@@ -8414,11 +8414,10 @@ aarch64_layout_frame (void) { /* Frame in which all saves are SVE saves: @@ -37,5 +34,5 @@ index 3329aa364..72604dd9d 100644 } else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp) -- -2.33.0 +2.43.5 diff --git a/0116-Backport-SME-aarch64-Fix-transitive-closure-of-featu.patch b/0116-Backport-SME-aarch64-Fix-transitive-closure-of-featu.patch deleted file mode 100644 index ed2a375..0000000 --- a/0116-Backport-SME-aarch64-Fix-transitive-closure-of-featu.patch +++ /dev/null @@ -1,154 +0,0 @@ -From c6698a5feb07fc0cda89a54a0ee4006295ac6dbe Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 29 Sep 2022 11:32:53 +0100 -Subject: [PATCH 017/157] [Backport][SME] aarch64: Fix transitive closure of - features - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=b754d32d3053a4ba2a82361ac0f2739797a811f1 - -aarch64-option-extensions.def requires us to maintain the transitive -closure of options by hand. This patch fixes a few cases where a -flag was missed. - -+noaes and +nosha2 now disable +crypto, which IMO makes more -sense and is consistent with the Clang behaviour. - -gcc/ - * config/aarch64/aarch64-option-extensions.def (dotprod): Depend - on fp as well as simd. - (sha3): Likewise. - (aes): Likewise. Make +noaes disable crypto. - (sha2): Likewise +nosha2. Also make +nosha2 disable sha3 and - sve2-sha3. - (sve2-sha3): Depend on sha2 as well as sha3. - -gcc/testsuite/ - * gcc.target/aarch64/options_set_6.c: Expect +crypto+nosha2 to - disable crypto but keep aes. - * gcc.target/aarch64/pragma_cpp_predefs_4.c: New test. ---- - .../aarch64/aarch64-option-extensions.def | 16 ++++--- - .../gcc.target/aarch64/options_set_6.c | 5 +- - .../gcc.target/aarch64/pragma_cpp_predefs_4.c | 47 +++++++++++++++++++ - 3 files changed, 58 insertions(+), 10 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c - -diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def -index b4d0ac8b6..b98008127 100644 ---- a/gcc/config/aarch64/aarch64-option-extensions.def -+++ b/gcc/config/aarch64/aarch64-option-extensions.def -@@ -113,28 +113,29 @@ AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, \ - - /* Enabling "dotprod" also enables "simd". - Disabling "dotprod" only disables "dotprod". */ --AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_SIMD, 0, \ -+AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_FPSIMD, 0, \ - false, "asimddp") - - /* Enabling "aes" also enables "simd". - Disabling "aes" disables "aes" and "sve2-aes'. */ --AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_SIMD, \ -- AARCH64_FL_SVE2_AES, false, "aes") -+AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_FPSIMD, \ -+ AARCH64_FL_SVE2_AES | AARCH64_FL_CRYPTO, false, "aes") - - /* Enabling "sha2" also enables "simd". - Disabling "sha2" just disables "sha2". */ --AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_SIMD, 0, false, \ -- "sha1 sha2") -+AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_FPSIMD, \ -+ AARCH64_FL_CRYPTO | AARCH64_FL_SHA3 | \ -+ AARCH64_FL_SVE2_SHA3, false, "sha1 sha2") - - /* Enabling "sha3" enables "simd" and "sha2". - Disabling "sha3" disables "sha3" and "sve2-sha3". */ --AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_SIMD | \ -+AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_FPSIMD | \ - AARCH64_FL_SHA2, AARCH64_FL_SVE2_SHA3, false, \ - "sha3 sha512") - - /* Enabling "sm4" also enables "simd". - Disabling "sm4" disables "sm4" and "sve2-sm4". */ --AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_SIMD, \ -+AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_FPSIMD, \ - AARCH64_FL_SVE2_SM4, false, "sm3 sm4") - - /* Enabling "fp16fml" also enables "fp" and "fp16". -@@ -192,6 +193,7 @@ AARCH64_OPT_EXTENSION("sve2-aes", AARCH64_FL_SVE2_AES, AARCH64_FL_AES | \ - /* Enabling "sve2-sha3" also enables "sha3", "simd", "fp16", "fp", "sve", and - "sve2". Disabling "sve2-sha3" just disables "sve2-sha3". */ - AARCH64_OPT_EXTENSION("sve2-sha3", AARCH64_FL_SVE2_SHA3, AARCH64_FL_SHA3 | \ -+ AARCH64_FL_SHA2 | \ - AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \ - AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "svesha3") - -diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_6.c b/gcc/testsuite/gcc.target/aarch64/options_set_6.c -index 90a055928..2a1d7fe5b 100644 ---- a/gcc/testsuite/gcc.target/aarch64/options_set_6.c -+++ b/gcc/testsuite/gcc.target/aarch64/options_set_6.c -@@ -6,7 +6,6 @@ int main () - return 0; - } - --/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */ -+/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+aes} 1 } } */ - --/* Group as a whole was requested to be turned on, crypto itself is a bit and so -- just turning off one feature can't turn it off. */ -+/* +crypto turns on +aes and +sha2, but +nosha2 disables +crypto. */ -diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c -new file mode 100644 -index 000000000..0e6461fa4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c -@@ -0,0 +1,47 @@ -+#pragma GCC target "+nothing+dotprod" -+#ifndef __ARM_FEATURE_FMA -+#error Foo -+#endif -+ -+#pragma GCC target "+nothing+aes" -+#ifndef __ARM_FEATURE_FMA -+#error Foo -+#endif -+ -+#pragma GCC target "+nothing+sha2" -+#ifndef __ARM_FEATURE_FMA -+#error Foo -+#endif -+ -+#pragma GCC target "+nothing+sha3" -+#ifndef __ARM_FEATURE_FMA -+#error Foo -+#endif -+ -+#pragma GCC target "+nothing+sm4" -+#ifndef __ARM_FEATURE_FMA -+#error Foo -+#endif -+ -+#pragma GCC target "+crypto+noaes" -+#ifdef __ARM_FEATURE_CRYPTO -+#error Foo -+#endif -+ -+#pragma GCC target "+crypto+nosha2" -+#ifdef __ARM_FEATURE_CRYPTO -+#error Foo -+#endif -+ -+#pragma GCC target "+nothing+sve2-sha3" -+#ifndef __ARM_FEATURE_SHA2 -+#error Foo -+#endif -+ -+#pragma GCC target "+sve2-sha3+nosha2" -+#ifdef __ARM_FEATURE_SHA3 -+#error Foo -+#endif -+#ifdef __ARM_FEATURE_SVE2_SHA3 -+#error Foo -+#endif --- -2.33.0 - diff --git a/0196-Backport-SME-aarch64-Tweak-stack-clash-boundary-cond.patch b/0116-aarch64-Tweak-stack-clash-boundary-condition.patch similarity index 90% rename from 0196-Backport-SME-aarch64-Tweak-stack-clash-boundary-cond.patch rename to 0116-aarch64-Tweak-stack-clash-boundary-condition.patch index 6199468..8599615 100644 --- a/0196-Backport-SME-aarch64-Tweak-stack-clash-boundary-cond.patch +++ b/0116-aarch64-Tweak-stack-clash-boundary-condition.patch @@ -1,10 +1,7 @@ -From b4581d1e6a7b94dfbd58871dad51d3f12889081f Mon Sep 17 00:00:00 2001 +From f22315d5c19e8310e4dc880fd509678fd291fca8 Mon Sep 17 00:00:00 2001 From: Richard Sandiford -Date: Tue, 12 Sep 2023 16:05:10 +0100 -Subject: [PATCH 097/157] [Backport][SME] aarch64: Tweak stack clash boundary - condition - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1785b8077cc03214ebd1db953c870172fcf15966 +Date: Tue, 12 Sep 2023 16:08:54 +0100 +Subject: [PATCH] aarch64: Tweak stack clash boundary condition The AArch64 ABI says that, when stack clash protection is used, there can be a maximum of 1KiB of unprobed space at sp on entry @@ -46,10 +43,10 @@ gcc/testsuite/ create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 72604dd9d..ba92a23a7 100644 +index 4d9fcf3d1623..34c1d8614cd9 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc -@@ -9943,9 +9943,11 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, +@@ -9333,9 +9333,11 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, HOST_WIDE_INT guard_size = 1 << param_stack_clash_protection_guard_size; HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; @@ -64,7 +61,7 @@ index 72604dd9d..ba92a23a7 100644 account any unprobed space there is above the current SP. There are diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c new file mode 100644 -index 000000000..0d8a25d73 +index 000000000000..0d8a25d73a24 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c @@ -0,0 +1,55 @@ @@ -124,5 +121,5 @@ index 000000000..0d8a25d73 + return 1; +} -- -2.33.0 +2.43.5 diff --git a/0117-Backport-SME-aarch64-Reorder-an-entry-in-aarch64-opt.patch b/0117-Backport-SME-aarch64-Reorder-an-entry-in-aarch64-opt.patch deleted file mode 100644 index f4ef844..0000000 --- a/0117-Backport-SME-aarch64-Reorder-an-entry-in-aarch64-opt.patch +++ /dev/null @@ -1,194 +0,0 @@ -From 4a2d0bdf5c9a5f4ee615c1d0768cb2e8a3dfef4a Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 29 Sep 2022 11:32:54 +0100 -Subject: [PATCH 018/157] [Backport][SME] aarch64: Reorder an entry in - aarch64-option-extensions.def - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c067c474f85b1e9c56fb34dd51ef0eec9221b766 - -aarch64-option-extensions.def was topologically sorted except -for one case: crypto came before its aes and sha2 dependencies. -This patch moves crypto after sha2 instead. - -gcc/ - * config/aarch64/aarch64-option-extensions.def: Move crypto - after sha2. - -gcc/testsuite/ - * gcc.target/aarch64/cpunative/native_cpu_0.c: Expect +crypto - to come after +crc. - * gcc.target/aarch64/cpunative/native_cpu_13.c: Likewise. - * gcc.target/aarch64/cpunative/native_cpu_16.c: Likewise. - * gcc.target/aarch64/cpunative/native_cpu_17.c: Likewise. - * gcc.target/aarch64/cpunative/native_cpu_6.c: Likewise. - * gcc.target/aarch64/cpunative/native_cpu_7.c: Likewise. - * gcc.target/aarch64/options_set_2.c: Likewise. - * gcc.target/aarch64/options_set_3.c: Likewise. - * gcc.target/aarch64/options_set_4.c: Likewise. ---- - .../aarch64/aarch64-option-extensions.def | 20 +++++++++---------- - .../aarch64/cpunative/native_cpu_0.c | 2 +- - .../aarch64/cpunative/native_cpu_13.c | 2 +- - .../aarch64/cpunative/native_cpu_16.c | 2 +- - .../aarch64/cpunative/native_cpu_17.c | 2 +- - .../aarch64/cpunative/native_cpu_6.c | 2 +- - .../aarch64/cpunative/native_cpu_7.c | 2 +- - .../gcc.target/aarch64/options_set_2.c | 2 +- - .../gcc.target/aarch64/options_set_3.c | 2 +- - .../gcc.target/aarch64/options_set_4.c | 4 ++-- - 10 files changed, 20 insertions(+), 20 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def -index b98008127..df2c8d19b 100644 ---- a/gcc/config/aarch64/aarch64-option-extensions.def -+++ b/gcc/config/aarch64/aarch64-option-extensions.def -@@ -76,16 +76,6 @@ AARCH64_OPT_EXTENSION("simd", AARCH64_FL_SIMD, AARCH64_FL_FP, \ - AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM, \ - false, "asimd") - --/* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2". -- Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4", -- "sve2-aes", "sve2-sha3", "sve2-sm4". */ --AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | \ -- AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, \ -- AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \ -- AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | \ -- AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4, true, \ -- "aes pmull sha1 sha2") -- - /* Enabling or disabling "crc" only changes "crc". */ - AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, 0, 0, false, "crc32") - -@@ -127,6 +117,16 @@ AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_FPSIMD, \ - AARCH64_FL_CRYPTO | AARCH64_FL_SHA3 | \ - AARCH64_FL_SVE2_SHA3, false, "sha1 sha2") - -+/* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2". -+ Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4", -+ "sve2-aes", "sve2-sha3", "sve2-sm4". */ -+AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | \ -+ AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, \ -+ AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \ -+ AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | \ -+ AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4, true, \ -+ "aes pmull sha1 sha2") -+ - /* Enabling "sha3" enables "simd" and "sha2". - Disabling "sha3" disables "sha3" and "sve2-sha3". */ - AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_FPSIMD | \ -diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c -index f155f51ba..8499f87c3 100644 ---- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c -+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c -@@ -7,6 +7,6 @@ int main() - return 0; - } - --/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod} } } */ -+/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto} } } */ - - /* Test a normal looking procinfo. */ -diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c -index b7b3a8e13..551669091 100644 ---- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c -+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c -@@ -7,6 +7,6 @@ int main() - return 0; - } - --/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod} } } */ -+/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto} } } */ - - /* Test one with mixed order of feature bits. */ -diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c -index a424e7c56..2f963bb23 100644 ---- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c -+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c -@@ -7,6 +7,6 @@ int main() - return 0; - } - --/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod\+sve2} } } */ -+/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\+sve2} } } */ - - /* Test a normal looking procinfo. */ -diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c -index c269c5fef..c68a697aa 100644 ---- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c -+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c -@@ -7,6 +7,6 @@ int main() - return 0; - } - --/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod\+sve2} } } */ -+/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\+sve2} } } */ - - /* Test a normal looking procinfo. */ -diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c -index da72052e6..7608e8845 100644 ---- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c -+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c -@@ -7,7 +7,7 @@ int main() - return 0; - } - --/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+fp16} } } */ -+/* { dg-final { scan-assembler {\.arch armv8-a\+fp16\+crypto} } } */ - - /* Test one where the feature bits for crypto and fp16 are given in - same order as declared in options file. */ -diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c -index 96ad4c14d..72b14b4f6 100644 ---- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c -+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c -@@ -7,7 +7,7 @@ int main() - return 0; - } - --/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+fp16} } } */ -+/* { dg-final { scan-assembler {\.arch armv8-a\+fp16\+crypto} } } */ - - /* Test one where the crypto and fp16 options are specified in different - order from what is in the options file. */ -diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_2.c b/gcc/testsuite/gcc.target/aarch64/options_set_2.c -index 3476febce..f82cb5f78 100644 ---- a/gcc/testsuite/gcc.target/aarch64/options_set_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/options_set_2.c -@@ -6,6 +6,6 @@ int main () - return 0; - } - --/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */ -+/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */ - - /* Check to see if crc and crypto are maintained if crypto specified. */ -diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_3.c b/gcc/testsuite/gcc.target/aarch64/options_set_3.c -index 4558339f1..7d350cfa3 100644 ---- a/gcc/testsuite/gcc.target/aarch64/options_set_3.c -+++ b/gcc/testsuite/gcc.target/aarch64/options_set_3.c -@@ -6,6 +6,6 @@ int main () - return 0; - } - --/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */ -+/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */ - - /* Check if smallest set is maintained when outputting. */ -diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_4.c b/gcc/testsuite/gcc.target/aarch64/options_set_4.c -index 15514bfe9..5370e02e1 100644 ---- a/gcc/testsuite/gcc.target/aarch64/options_set_4.c -+++ b/gcc/testsuite/gcc.target/aarch64/options_set_4.c -@@ -6,7 +6,7 @@ int main () - return 0; - } - --/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */ -+/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */ - - /* Check if individual bits that make up a grouping is specified that only the -- grouping is kept. */ -\ No newline at end of file -+ grouping is kept. */ --- -2.33.0 - diff --git a/0197-Backport-SME-aarch64-Put-LR-save-probe-in-first-16-b.patch b/0117-aarch64-Put-LR-save-probe-in-first-16-bytes.patch similarity index 95% rename from 0197-Backport-SME-aarch64-Put-LR-save-probe-in-first-16-b.patch rename to 0117-aarch64-Put-LR-save-probe-in-first-16-bytes.patch index c848e19..151eae4 100644 --- a/0197-Backport-SME-aarch64-Put-LR-save-probe-in-first-16-b.patch +++ b/0117-aarch64-Put-LR-save-probe-in-first-16-bytes.patch @@ -1,10 +1,7 @@ -From ffd483dc6a2a4af495d56cf5ebdbbb3b9ca58820 Mon Sep 17 00:00:00 2001 +From 15e18831bf98fd25af098b970ebf0c9a6200a34b Mon Sep 17 00:00:00 2001 From: Richard Sandiford -Date: Tue, 12 Sep 2023 16:05:11 +0100 -Subject: [PATCH 098/157] [Backport][SME] aarch64: Put LR save probe in first - 16 bytes - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fee0a18abfdd4874194abd149943fa7c77a29b7c +Date: Tue, 12 Sep 2023 16:08:55 +0100 +Subject: [PATCH] aarch64: Put LR save probe in first 16 bytes -fstack-clash-protection uses the save of LR as a probe for the next allocation. The next allocation could be: @@ -73,10 +70,10 @@ gcc/testsuite/ create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index ba92a23a7..1ba4c2f89 100644 +index 34c1d8614cd9..16433fb70f4f 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc -@@ -8873,26 +8873,34 @@ aarch64_layout_frame (void) +@@ -8273,26 +8273,34 @@ aarch64_layout_frame (void) bool saves_below_hard_fp_p = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); frame.bytes_below_hard_fp = offset; @@ -124,7 +121,7 @@ index ba92a23a7..1ba4c2f89 100644 poly_int64 max_int_offset = offset; offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); -@@ -8970,10 +8978,13 @@ aarch64_layout_frame (void) +@@ -8370,10 +8378,13 @@ aarch64_layout_frame (void) max_push_offset to 0, because no registers are popped at this time, so callee_adjust cannot be adjusted. */ HOST_WIDE_INT max_push_offset = 0; @@ -142,7 +139,7 @@ index ba92a23a7..1ba4c2f89 100644 HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; HOST_WIDE_INT const_saved_regs_size; -@@ -9949,29 +9960,6 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, +@@ -9339,29 +9350,6 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, = (final_adjustment_p ? guard_used_by_caller + byte_sp_alignment : guard_size - guard_used_by_caller); @@ -172,7 +169,7 @@ index ba92a23a7..1ba4c2f89 100644 poly_int64 frame_size = frame.frame_size; /* We should always have a positive probe threshold. */ -@@ -10151,8 +10139,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, +@@ -9541,8 +9529,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, if (final_adjustment_p && rounded_size != 0) min_probe_threshold = 0; /* If doing a small final adjustment, we always probe at offset 0. @@ -185,7 +182,7 @@ index ba92a23a7..1ba4c2f89 100644 diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c new file mode 100644 -index 000000000..82447d20f +index 000000000000..82447d20fff5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c @@ -0,0 +1,100 @@ @@ -291,7 +288,7 @@ index 000000000..82447d20f +} diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c new file mode 100644 -index 000000000..73ac3e4e4 +index 000000000000..73ac3e4e4eb0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c @@ -0,0 +1,100 @@ @@ -397,7 +394,7 @@ index 000000000..73ac3e4e4 +} diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c new file mode 100644 -index 000000000..690aae8df +index 000000000000..690aae8dfd5b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c @@ -0,0 +1,3 @@ @@ -405,5 +402,5 @@ index 000000000..690aae8df + +#include "stack-check-prologue-19.c" -- -2.33.0 +2.43.5 diff --git a/0118-Backport-SME-aarch64-Simplify-feature-definitions.patch b/0118-Backport-SME-aarch64-Simplify-feature-definitions.patch deleted file mode 100644 index ffff47d..0000000 --- a/0118-Backport-SME-aarch64-Simplify-feature-definitions.patch +++ /dev/null @@ -1,1176 +0,0 @@ -From deb18d5083d8f9edbdafac184c010a6720dc8dda Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 29 Sep 2022 11:32:54 +0100 -Subject: [PATCH 019/157] [Backport][SME] aarch64: Simplify feature definitions - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=11a113d501ff64fa4843e28d0a21b3f4e9d0d3de - -Currently the aarch64-option-extensions.def entries, the -aarch64-cores.def entries, and the AARCH64_FL_FOR_* macros -have a transitive closure of dependencies that is maintained by hand. -This is a bit error-prone and is becoming less tenable as more features -are added. The main point of this patch is to maintain the closure -automatically instead. - -For example, the +sve2-aes extension requires sve2 and aes. -This is now described using: - - AARCH64_OPT_EXTENSION("sve2-aes", SVE2_AES, (SVE2, AES), ...) - -If life was simple, we could just give the name of the feature -and the list of features that it requires/depends on. But sadly -things are more complicated. For example: - -- the legacy +crypto option enables aes and sha2 only, but +nocrypto - disables all crypto-related extensions, including sm4. - -- +fp16fml enables fp16, but armv8.4-a enables fp16fml without fp16. - fp16fml only has an effect when fp16 is also present; see the - comments for more details. - -- +bf16 enables simd, but +bf16+nosimd is valid and enables just the - scalar bf16 instructions. rdma behaves similarly. - -To handle cases like these, the option entries have extra fields to -specify what an explicit +foo enables and what an explicit +nofoo -disables, in addition to the absolute dependencies. - -The other main changes are: - -- AARCH64_FL_* are now defined automatically. - -- the feature list for each architecture level moves from aarch64.h - to aarch64-arches.def. - -As a consequence, we now have a (redundant) V8A feature flag. - -While there, the patch uses a new typedef, aarch64_feature_flags, -for the set of feature flags. This should make it easier to switch -to a class if we run out of bits in the uint64_t. - -For now the patch hardcodes the fact that crypto is the only -synthetic option. A later patch will remove this field. - -To test for things that might not be covered by the testsuite, -I made the driver print out the all_extensions, all_cores and -all_archs arrays before and after the patch, with the following -tweaks: - -- renumber the old AARCH64_FL_* bit assignments to match the .def order -- remove the new V8A flag when printing the new tables -- treat CRYPTO and CRYPTO | AES | SHA2 the same way when printing the - core tables - -(On the last point: some cores enabled just CRYPTO while others enabled -CRYPTO, AES and SHA2. This doesn't cause a difference in behaviour -because of how the dependent macros are defined. With the new scheme, -all entries with CRYPTO automatically get AES and SHA2 too.) - -The only difference is that +nofp now turns off dotprod. This was -another instance of an incomplete transitive closure, but unlike the -instances fixed in a previous patch, it had no observable effect. - -gcc/ - * config/aarch64/aarch64-option-extensions.def: Switch to a new format. - * config/aarch64/aarch64-cores.def: Use the same format to specify - lists of features. - * config/aarch64/aarch64-arches.def: Likewise, moving that information - from aarch64.h. - * config/aarch64/aarch64-opts.h (aarch64_feature_flags): New typedef. - * config/aarch64/aarch64.h (aarch64_feature): New class enum. - Turn AARCH64_FL_* macros into constexprs, getting the definitions - from aarch64-option-extensions.def. Remove AARCH64_FL_FOR_* macros. - * common/config/aarch64/aarch64-common.cc: Include - aarch64-feature-deps.h. - (all_extensions): Update for new .def format. - (all_extensions_by_on, all_cores, all_architectures): Likewise. - * config/aarch64/driver-aarch64.cc: Include aarch64-feature-deps.h. - (aarch64_extensions): Update for new .def format. - (aarch64_cpu_data, aarch64_arches): Likewise. - * config/aarch64/aarch64.cc: Include aarch64-feature-deps.h. - (all_architectures, all_cores): Update for new .def format. - * config/aarch64/aarch64-sve-builtins.cc - (check_required_extensions): Likewise. ---- - gcc/common/config/aarch64/aarch64-common.cc | 29 +- - gcc/config/aarch64/aarch64-arches.def | 28 +- - gcc/config/aarch64/aarch64-cores.def | 130 +++---- - gcc/config/aarch64/aarch64-feature-deps.h | 121 +++++++ - .../aarch64/aarch64-option-extensions.def | 323 +++++++----------- - gcc/config/aarch64/aarch64-opts.h | 4 + - gcc/config/aarch64/aarch64-sve-builtins.cc | 5 +- - gcc/config/aarch64/aarch64.cc | 14 +- - gcc/config/aarch64/aarch64.h | 164 ++------- - gcc/config/aarch64/driver-aarch64.cc | 10 +- - 10 files changed, 374 insertions(+), 454 deletions(-) - create mode 100644 gcc/config/aarch64/aarch64-feature-deps.h - -diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc -index a965ac660..74729bb30 100644 ---- a/gcc/common/config/aarch64/aarch64-common.cc -+++ b/gcc/common/config/aarch64/aarch64-common.cc -@@ -30,6 +30,7 @@ - #include "opts.h" - #include "flags.h" - #include "diagnostic.h" -+#include "config/aarch64/aarch64-feature-deps.h" - - #ifdef TARGET_BIG_ENDIAN_DEFAULT - #undef TARGET_DEFAULT_TARGET_FLAGS -@@ -214,9 +215,12 @@ struct aarch64_option_extension - /* ISA extensions in AArch64. */ - static const struct aarch64_option_extension all_extensions[] = - { --#define AARCH64_OPT_EXTENSION(NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, \ -- SYNTHETIC, Z) \ -- {NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, SYNTHETIC}, -+#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \ -+ {NAME, AARCH64_FL_##IDENT, \ -+ feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \ -+ feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \ -+ & ~AARCH64_FL_##IDENT, \ -+ AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO}, - #include "config/aarch64/aarch64-option-extensions.def" - {NULL, 0, 0, 0, false} - }; -@@ -225,9 +229,12 @@ static const struct aarch64_option_extension all_extensions[] = - bits and extension turned on. Cached for efficiency. */ - static struct aarch64_option_extension all_extensions_by_on[] = - { --#define AARCH64_OPT_EXTENSION(NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, \ -- SYNTHETIC, Z) \ -- {NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, SYNTHETIC}, -+#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \ -+ {NAME, AARCH64_FL_##IDENT, \ -+ feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \ -+ feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \ -+ & ~AARCH64_FL_##IDENT, \ -+ AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO}, - #include "config/aarch64/aarch64-option-extensions.def" - {NULL, 0, 0, 0, false} - }; -@@ -250,18 +257,18 @@ struct arch_to_arch_name - the default set of architectural feature flags they support. */ - static const struct processor_name_to_arch all_cores[] = - { --#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \ -- {NAME, AARCH64_ARCH_##ARCH_IDENT, AARCH64_FL_FOR_##ARCH_IDENT | FLAGS}, -+#define AARCH64_CORE(NAME, CORE_IDENT, C, ARCH_IDENT, E, F, G, H, I) \ -+ {NAME, AARCH64_ARCH_##ARCH_IDENT, feature_deps::cpu_##CORE_IDENT}, - #include "config/aarch64/aarch64-cores.def" -- {"generic", AARCH64_ARCH_V8A, AARCH64_FL_FOR_V8A}, -+ {"generic", AARCH64_ARCH_V8A, feature_deps::V8A ().enable}, - {"", aarch64_no_arch, 0} - }; - - /* Map architecture revisions to their string representation. */ - static const struct arch_to_arch_name all_architectures[] = - { --#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH, FLAGS) \ -- {AARCH64_ARCH_##ARCH_IDENT, NAME, FLAGS}, -+#define AARCH64_ARCH(NAME, B, ARCH_IDENT, D, E) \ -+ {AARCH64_ARCH_##ARCH_IDENT, NAME, feature_deps::ARCH_IDENT ().enable}, - #include "config/aarch64/aarch64-arches.def" - {aarch64_no_arch, "", 0} - }; -diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def -index ece96e22a..9f8246618 100644 ---- a/gcc/config/aarch64/aarch64-arches.def -+++ b/gcc/config/aarch64/aarch64-arches.def -@@ -30,19 +30,19 @@ - Due to the assumptions about the positions of these fields in config.gcc, - NAME should be kept as the first argument. */ - --AARCH64_ARCH("armv8-a", generic, V8A, 8, AARCH64_FL_FOR_V8A) --AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, AARCH64_FL_FOR_V8_1A) --AARCH64_ARCH("armv8.2-a", generic, V8_2A, 8, AARCH64_FL_FOR_V8_2A) --AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, AARCH64_FL_FOR_V8_3A) --AARCH64_ARCH("armv8.4-a", generic, V8_4A, 8, AARCH64_FL_FOR_V8_4A) --AARCH64_ARCH("armv8.5-a", generic, V8_5A, 8, AARCH64_FL_FOR_V8_5A) --AARCH64_ARCH("armv8.6-a", generic, V8_6A, 8, AARCH64_FL_FOR_V8_6A) --AARCH64_ARCH("armv8.7-a", generic, V8_7A, 8, AARCH64_FL_FOR_V8_7A) --AARCH64_ARCH("armv8.8-a", generic, V8_8A, 8, AARCH64_FL_FOR_V8_8A) --AARCH64_ARCH("armv8-r", generic, V8R , 8, AARCH64_FL_FOR_V8R) --AARCH64_ARCH("armv9-a", generic, V9A , 9, AARCH64_FL_FOR_V9A) --AARCH64_ARCH("armv9.1-a", generic, V9_1A, 9, AARCH64_FL_FOR_V9_1A) --AARCH64_ARCH("armv9.2-a", generic, V9_2A, 9, AARCH64_FL_FOR_V9_2A) --AARCH64_ARCH("armv9.3-a", generic, V9_3A, 9, AARCH64_FL_FOR_V9_3A) -+AARCH64_ARCH("armv8-a", generic, V8A, 8, (SIMD)) -+AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, (V8A, LSE, CRC, RDMA)) -+AARCH64_ARCH("armv8.2-a", generic, V8_2A, 8, (V8_1A)) -+AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, (V8_2A, PAUTH)) -+AARCH64_ARCH("armv8.4-a", generic, V8_4A, 8, (V8_3A, F16FML, DOTPROD, FLAGM)) -+AARCH64_ARCH("armv8.5-a", generic, V8_5A, 8, (V8_4A, SB, SSBS, PREDRES)) -+AARCH64_ARCH("armv8.6-a", generic, V8_6A, 8, (V8_5A, I8MM, BF16)) -+AARCH64_ARCH("armv8.7-a", generic, V8_7A, 8, (V8_6A, LS64)) -+AARCH64_ARCH("armv8.8-a", generic, V8_8A, 8, (V8_7A, MOPS)) -+AARCH64_ARCH("armv8-r", generic, V8R , 8, (V8_4A)) -+AARCH64_ARCH("armv9-a", generic, V9A , 9, (V8_5A, SVE2)) -+AARCH64_ARCH("armv9.1-a", generic, V9_1A, 9, (V8_6A, V9A)) -+AARCH64_ARCH("armv9.2-a", generic, V9_2A, 9, (V8_7A, V9_1A)) -+AARCH64_ARCH("armv9.3-a", generic, V9_3A, 9, (V8_8A, V9_2A)) - - #undef AARCH64_ARCH -diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def -index cf500d0a9..60299160b 100644 ---- a/gcc/config/aarch64/aarch64-cores.def -+++ b/gcc/config/aarch64/aarch64-cores.def -@@ -46,132 +46,132 @@ - /* ARMv8-A Architecture Processors. */ - - /* ARM ('A') cores. */ --AARCH64_CORE("cortex-a34", cortexa34, cortexa53, V8A, AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) --AARCH64_CORE("cortex-a35", cortexa35, cortexa53, V8A, AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) --AARCH64_CORE("cortex-a53", cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) --AARCH64_CORE("cortex-a57", cortexa57, cortexa57, V8A, AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) --AARCH64_CORE("cortex-a72", cortexa72, cortexa57, V8A, AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1) --AARCH64_CORE("cortex-a73", cortexa73, cortexa57, V8A, AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1) -+AARCH64_CORE("cortex-a34", cortexa34, cortexa53, V8A, (CRC), cortexa35, 0x41, 0xd02, -1) -+AARCH64_CORE("cortex-a35", cortexa35, cortexa53, V8A, (CRC), cortexa35, 0x41, 0xd04, -1) -+AARCH64_CORE("cortex-a53", cortexa53, cortexa53, V8A, (CRC), cortexa53, 0x41, 0xd03, -1) -+AARCH64_CORE("cortex-a57", cortexa57, cortexa57, V8A, (CRC), cortexa57, 0x41, 0xd07, -1) -+AARCH64_CORE("cortex-a72", cortexa72, cortexa57, V8A, (CRC), cortexa72, 0x41, 0xd08, -1) -+AARCH64_CORE("cortex-a73", cortexa73, cortexa57, V8A, (CRC), cortexa73, 0x41, 0xd09, -1) - - /* Cavium ('C') cores. */ --AARCH64_CORE("thunderx", thunderx, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) -+AARCH64_CORE("thunderx", thunderx, thunderx, V8A, (CRC, CRYPTO), thunderx, 0x43, 0x0a0, -1) - /* Do not swap around "thunderxt88p1" and "thunderxt88", - this order is required to handle variant correctly. */ --AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0) --AARCH64_CORE("thunderxt88", thunderxt88, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1) -+AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, V8A, (CRC, CRYPTO), thunderxt88, 0x43, 0x0a1, 0) -+AARCH64_CORE("thunderxt88", thunderxt88, thunderx, V8A, (CRC, CRYPTO), thunderxt88, 0x43, 0x0a1, -1) - - /* OcteonTX is the official name for T81/T83. */ --AARCH64_CORE("octeontx", octeontx, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1) --AARCH64_CORE("octeontx81", octeontxt81, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) --AARCH64_CORE("octeontx83", octeontxt83, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) -+AARCH64_CORE("octeontx", octeontx, thunderx, V8A, (CRC, CRYPTO), thunderx, 0x43, 0x0a0, -1) -+AARCH64_CORE("octeontx81", octeontxt81, thunderx, V8A, (CRC, CRYPTO), thunderx, 0x43, 0x0a2, -1) -+AARCH64_CORE("octeontx83", octeontxt83, thunderx, V8A, (CRC, CRYPTO), thunderx, 0x43, 0x0a3, -1) - --AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1) --AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1) -+AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, (CRC, CRYPTO), thunderx, 0x43, 0x0a2, -1) -+AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, (CRC, CRYPTO), thunderx, 0x43, 0x0a3, -1) - - /* Ampere Computing ('\xC0') cores. */ --AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, 0, ampere1, 0xC0, 0xac3, -1) -+AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, (), ampere1, 0xC0, 0xac3, -1) - /* Do not swap around "emag" and "xgene1", - this order is required to handle variant correctly. */ --AARCH64_CORE("emag", emag, xgene1, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3) -+AARCH64_CORE("emag", emag, xgene1, V8A, (CRC, CRYPTO), emag, 0x50, 0x000, 3) - - /* APM ('P') cores. */ --AARCH64_CORE("xgene1", xgene1, xgene1, V8A, 0, xgene1, 0x50, 0x000, -1) -+AARCH64_CORE("xgene1", xgene1, xgene1, V8A, (), xgene1, 0x50, 0x000, -1) - - /* Qualcomm ('Q') cores. */ --AARCH64_CORE("falkor", falkor, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) --AARCH64_CORE("qdf24xx", qdf24xx, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1) -+AARCH64_CORE("falkor", falkor, falkor, V8A, (CRC, CRYPTO, RDMA), qdf24xx, 0x51, 0xC00, -1) -+AARCH64_CORE("qdf24xx", qdf24xx, falkor, V8A, (CRC, CRYPTO, RDMA), qdf24xx, 0x51, 0xC00, -1) - - /* Samsung ('S') cores. */ --AARCH64_CORE("exynos-m1", exynosm1, exynosm1, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1) -+AARCH64_CORE("exynos-m1", exynosm1, exynosm1, V8A, (CRC, CRYPTO), exynosm1, 0x53, 0x001, -1) - - /* HXT ('h') cores. */ --AARCH64_CORE("phecda", phecda, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1) -+AARCH64_CORE("phecda", phecda, falkor, V8A, (CRC, CRYPTO), qdf24xx, 0x68, 0x000, -1) - - /* ARMv8.1-A Architecture Processors. */ - - /* Broadcom ('B') cores. */ --AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) --AARCH64_CORE("vulcan", vulcan, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1) -+AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, V8_1A, (CRYPTO), thunderx2t99, 0x42, 0x516, -1) -+AARCH64_CORE("vulcan", vulcan, thunderx2t99, V8_1A, (CRYPTO), thunderx2t99, 0x42, 0x516, -1) - - /* Cavium ('C') cores. */ --AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1) -+AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, V8_1A, (CRYPTO), thunderx2t99, 0x43, 0x0af, -1) - - /* ARMv8.2-A Architecture Processors. */ - - /* ARM ('A') cores. */ --AARCH64_CORE("cortex-a55", cortexa55, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) --AARCH64_CORE("cortex-a75", cortexa75, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) --AARCH64_CORE("cortex-a76", cortexa76, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) --AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) --AARCH64_CORE("cortex-a77", cortexa77, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) --AARCH64_CORE("cortex-a78", cortexa78, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1) --AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1) --AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1) --AARCH64_CORE("cortex-a65", cortexa65, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) --AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) --AARCH64_CORE("cortex-x1", cortexx1, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1) --AARCH64_CORE("ares", ares, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) --AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) --AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1) -+AARCH64_CORE("cortex-a55", cortexa55, cortexa53, V8_2A, (F16, RCPC, DOTPROD), cortexa53, 0x41, 0xd05, -1) -+AARCH64_CORE("cortex-a75", cortexa75, cortexa57, V8_2A, (F16, RCPC, DOTPROD), cortexa73, 0x41, 0xd0a, -1) -+AARCH64_CORE("cortex-a76", cortexa76, cortexa57, V8_2A, (F16, RCPC, DOTPROD), neoversen1, 0x41, 0xd0b, -1) -+AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, V8_2A, (F16, RCPC, DOTPROD, SSBS), neoversen1, 0x41, 0xd0e, -1) -+AARCH64_CORE("cortex-a77", cortexa77, cortexa57, V8_2A, (F16, RCPC, DOTPROD, SSBS), neoversen1, 0x41, 0xd0d, -1) -+AARCH64_CORE("cortex-a78", cortexa78, cortexa57, V8_2A, (F16, RCPC, DOTPROD, SSBS, PROFILE), neoversen1, 0x41, 0xd41, -1) -+AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, V8_2A, (F16, RCPC, DOTPROD, SSBS, PROFILE), neoversen1, 0x41, 0xd42, -1) -+AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, V8_2A, (F16, RCPC, DOTPROD, SSBS, PROFILE, FLAGM, PAUTH), neoversen1, 0x41, 0xd4b, -1) -+AARCH64_CORE("cortex-a65", cortexa65, cortexa53, V8_2A, (F16, RCPC, DOTPROD, SSBS), cortexa73, 0x41, 0xd06, -1) -+AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, V8_2A, (F16, RCPC, DOTPROD, SSBS), cortexa73, 0x41, 0xd43, -1) -+AARCH64_CORE("cortex-x1", cortexx1, cortexa57, V8_2A, (F16, RCPC, DOTPROD, SSBS, PROFILE), neoversen1, 0x41, 0xd44, -1) -+AARCH64_CORE("ares", ares, cortexa57, V8_2A, (F16, RCPC, DOTPROD, PROFILE), neoversen1, 0x41, 0xd0c, -1) -+AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, V8_2A, (F16, RCPC, DOTPROD, PROFILE), neoversen1, 0x41, 0xd0c, -1) -+AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, V8_2A, (F16, RCPC, DOTPROD, SSBS), cortexa73, 0x41, 0xd4a, -1) - - /* Cavium ('C') cores. */ --AARCH64_CORE("octeontx2", octeontx2, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1) --AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1) --AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) -+AARCH64_CORE("octeontx2", octeontx2, cortexa57, V8_2A, (CRYPTO, PROFILE), cortexa57, 0x43, 0x0b0, -1) -+AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, V8_2A, (CRYPTO, PROFILE), cortexa57, 0x43, 0x0b1, -1) -+AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, V8_2A, (CRYPTO, PROFILE), cortexa57, 0x43, 0x0b2, -1) - /* Note OcteonTX2 T93 is an alias to OcteonTX2 T96. */ --AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1) --AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1) --AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1) --AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1) -+AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, V8_2A, (CRYPTO, PROFILE), cortexa57, 0x43, 0x0b2, -1) -+AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, V8_2A, (CRYPTO, PROFILE), cortexa57, 0x43, 0x0b3, -1) -+AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, V8_2A, (CRYPTO, PROFILE), cortexa57, 0x43, 0x0b4, -1) -+AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A, (CRYPTO, PROFILE), cortexa57, 0x43, 0x0b5, -1) - - /* Fujitsu ('F') cores. */ --AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1) -+AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, (F16, SVE), a64fx, 0x46, 0x001, -1) - - /* HiSilicon ('H') cores. */ --AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1) -+AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, (CRYPTO, F16), tsv110, 0x48, 0xd01, -1) - - /* ARMv8.3-A Architecture Processors. */ - - /* Marvell cores (TX3). */ --AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML, thunderx3t110, 0x43, 0x0b8, 0x0a) -+AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, (CRYPTO, RCPC, SM4, SHA3, F16FML), thunderx3t110, 0x43, 0x0b8, 0x0a) - - /* ARMv8.4-A Architecture Processors. */ - - /* Arm ('A') cores. */ --AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) --AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) --AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) -+AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1) -+AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1) -+AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) - - /* Qualcomm ('Q') cores. */ --AARCH64_CORE("saphira", saphira, saphira, V8_4A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) -+AARCH64_CORE("saphira", saphira, saphira, V8_4A, (CRYPTO, RCPC), saphira, 0x51, 0xC01, -1) - - /* ARMv8-A big.LITTLE implementations. */ - --AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1) --AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1) --AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, V8A, AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1) --AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1) -+AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, V8A, (CRC), cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1) -+AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, V8A, (CRC), cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1) -+AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, V8A, (CRC), cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1) -+AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, V8A, (CRC), cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1) - - /* ARM DynamIQ big.LITTLE configurations. */ - --AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1) --AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1) -+AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, V8_2A, (F16, RCPC, DOTPROD), cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1) -+AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, V8_2A, (F16, RCPC, DOTPROD), neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1) - - /* Armv8-R Architecture Processors. */ --AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, 0, cortexa53, 0x41, 0xd15, -1) -+AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, (), cortexa53, 0x41, 0xd15, -1) - - /* Armv9.0-A Architecture Processors. */ - - /* Arm ('A') cores. */ --AARCH64_CORE("cortex-a510", cortexa510, cortexa55, V9A, AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1) -+AARCH64_CORE("cortex-a510", cortexa510, cortexa55, V9A, (SVE2_BITPERM, MEMTAG, I8MM, BF16), cortexa53, 0x41, 0xd46, -1) - --AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1) -+AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, (SVE2_BITPERM, MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd47, -1) - --AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1) -+AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, (SVE2_BITPERM, MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd48, -1) - --AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1) -+AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversen2, 0x41, 0xd49, -1) - --AARCH64_CORE("demeter", demeter, cortexa57, V9A, AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) --AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1) -+AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) -+AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) - - #undef AARCH64_CORE -diff --git a/gcc/config/aarch64/aarch64-feature-deps.h b/gcc/config/aarch64/aarch64-feature-deps.h -new file mode 100644 -index 000000000..3e33cb2ce ---- /dev/null -+++ b/gcc/config/aarch64/aarch64-feature-deps.h -@@ -0,0 +1,121 @@ -+/* Feature dependency helpers for AArch64. -+ Copyright (C) 2022 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3, or (at your option) -+ any later version. -+ -+ GCC is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ . */ -+ -+#ifndef AARCH64_FEATURE_DEPS_H -+#define AARCH64_FEATURE_DEPS_H 1 -+ -+namespace { -+namespace feature_deps { -+ -+/* Together, these definitions of get_flags take a list of -+ feature names (representing functions that are defined below) -+ and return the set of associated flags. */ -+constexpr aarch64_feature_flags get_flags () { return 0; } -+ -+template -+constexpr aarch64_feature_flags -+get_flags (T1 i, Ts... args) -+{ -+ return i ().flag | get_flags (args...); -+} -+ -+/* Like get_flags, but return the transitive closure of those features -+ and the ones that they rely on. */ -+constexpr aarch64_feature_flags get_enable () { return 0; } -+ -+template -+constexpr aarch64_feature_flags -+get_enable (T1 i, Ts... args) -+{ -+ return i ().enable | get_enable (args...); -+} -+ -+/* Define info such that it has the following static constant -+ variables: -+ -+ - flag: the aarch64_feature_flags bit associated with FEATURE -+ -+ - enable: the transitive closure of the features that FEATURE requires, -+ plus FLAG itself -+ -+ - explicit_on: the transitive closure of the features that an -+ explicit +FEATURE enables, including FLAG itself. This is -+ always a superset of ENABLE -+ -+ Also define a function FEATURE () that returns an info -+ (which is an empty structure, since all members are static). -+ -+ Building up the list feature-by-feature ensures that the definition -+ files are in topological order. */ -+template struct info; -+ -+#define HANDLE(IDENT, REQUIRES, EXPLICIT_ON) \ -+ template<> struct info { \ -+ static constexpr auto flag = AARCH64_FL_##IDENT; \ -+ static constexpr auto enable = flag | get_enable REQUIRES; \ -+ static constexpr auto explicit_on = enable | get_enable EXPLICIT_ON; \ -+ }; \ -+ constexpr info IDENT () \ -+ { \ -+ return info (); \ -+ } -+#define AARCH64_OPT_EXTENSION(A, IDENT, REQUIRES, EXPLICIT_ON, E, F) \ -+ HANDLE (IDENT, REQUIRES, EXPLICIT_ON) -+#define AARCH64_ARCH(A, B, IDENT, D, REQUIRES) HANDLE (IDENT, REQUIRES, ()) -+#include "config/aarch64/aarch64-option-extensions.def" -+#include "config/aarch64/aarch64-arches.def" -+#undef HANDLE -+ -+/* Return the set of all features that would need to be disabled if -+ the features in MASK are disabled. -+ -+ Note that the size of the expression varies linearly with the number -+ of features, which means that invoking this function once per feature -+ is quadratic in the number of features. However, collecting the same -+ information at compiler start-up is likely to be quadratic too, so -+ we're better off paying the cost once per compiler build rather than -+ once per compiler run. */ -+constexpr aarch64_feature_flags -+get_flags_off (aarch64_feature_flags mask) -+{ -+ return (0 -+#define AARCH64_OPT_EXTENSION(A, IDENT, C, D, E, F) \ -+ | (feature_deps::IDENT ().enable & mask ? AARCH64_FL_##IDENT : 0) -+#include "config/aarch64/aarch64-option-extensions.def" -+ ); -+} -+ -+/* Define root_off_ variables for each feature, giving the set of -+ features that must be turned off by +noIDENT. This set is not transitively -+ closed; use get_flags_off to complete the closure. */ -+#define AARCH64_OPT_EXTENSION(A, IDENT, C, D, EXPLICIT_OFF, F) \ -+ constexpr auto root_off_##IDENT \ -+ = AARCH64_FL_##IDENT | get_flags EXPLICIT_OFF; -+#include "config/aarch64/aarch64-option-extensions.def" -+ -+/* Define cpu_ variables for each CPU, giving the transitive -+ closure of all the features that the CPU supports. */ -+#define AARCH64_CORE(A, CORE_IDENT, C, ARCH_IDENT, FEATURES, F, G, H, I) \ -+ constexpr auto cpu_##CORE_IDENT = ARCH_IDENT ().enable | get_enable FEATURES; -+#include "config/aarch64/aarch64-cores.def" -+ -+} -+} -+ -+#endif -diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def -index df2c8d19b..bdf4baf30 100644 ---- a/gcc/config/aarch64/aarch64-option-extensions.def -+++ b/gcc/config/aarch64/aarch64-option-extensions.def -@@ -21,23 +21,34 @@ - - Before using #include to read this file, define a macro: - -- AARCH64_OPT_EXTENSION(EXT_NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, -- SYNTHETIC, FEATURE_STRING) -- -- - EXT_NAME is the name of the extension, represented as a string constant. -- - FLAGS_CANONICAL is the canonical internal name for this flag. -- - FLAGS_ON are the bitwise-or of the features that enabling the extension -- adds, or zero if enabling this extension has no effect on other features. -- - FLAGS_OFF are the bitwise-or of the features that disabling the extension -- removes, or zero if disabling this extension has no effect on other -- features. -- - SYNTHETIC is a boolean to indicate whether the option is a purely synthetic -- grouping of options and that the option itself has no feature bit (e.g. -- crypto). This is used to determine when sum of the individual options in -- FLAGS_ON can be replaced by FLAG_CANONICAL in options minimization. If the -- group is synthetic then they can be replaced when all options in FLAGS_ON -- are enabled, otherwise they can only be replaced when -- FLAGS_ON | FLAG_CANONICAL are enabled. -+ AARCH64_OPT_EXTENSION(NAME, IDENT, REQUIRES, EXPLICIT_ON, -+ EXPLICIT_OFF, FEATURE_STRING) -+ -+ - NAME is the name of the extension, represented as a string constant. -+ -+ - IDENT is the canonical internal name for this flag. -+ -+ - REQUIRES is a list of features that must be enabled whenever this -+ feature is enabled. The relationship is implicitly transitive: -+ if A appears in B's REQUIRES and B appears in C's REQUIRES then -+ A and B must be enabled whenever C is. Thus, turning on C also -+ turns on A and B, while turning off A or B also turns off C. -+ -+ - EXPLICIT_ON is a list of features that are enabled by an explicit -+ +NAME specification, in addition to those listed in REQUIRES. -+ Usually this is an empty list; comments below explain the exceptions. -+ The list is implicitly transitively closed wrt REQUIRES (but *not* -+ to EXPLICIT_ON, since NAME is the only thing explicit in +NAME). -+ Thus if A is in B's REQUIRES and B is in C's EXPLICIT_ON, +C will -+ enable both B and A. B's EXPLICIT_ON has no effect on +C. -+ -+ - EXPLICIT_OFF is a list of features that are disabled by an explicit -+ +noNAME specification, in addition to the features that are transitively -+ dependent on NAME (according to REQUIRES). As with EXPLICIT_ON, -+ this is usually an empty list; comments below explain the exceptions. -+ If a feature A appears in this list then the list implicitly includes -+ any features that are transitively dependent on A (according to REQUIRES). -+ - - FEAT_STRING is a string containing the entries in the 'Features' field of - /proc/cpuinfo on a GNU/Linux system that correspond to this architecture - extension being available. Sometimes multiple entries are needed to enable -@@ -47,197 +58,95 @@ - that are required. Their order is not important. An empty string means - do not detect this feature during auto detection. - -- NOTE: Any changes to the AARCH64_OPT_EXTENSION macro need to be mirrored in -- config.gcc. */ -- --/* Enabling "fp" just enables "fp". -- Disabling "fp" also disables "simd", "crypto", "fp16", "aes", "sha2", -- "sha3", sm3/sm4, "sve", "sve2", "sve2-aes", "sve2-sha3", "sve2-sm4", -- "sve2-bitperm", "i8mm", "f32mm", "f64mm", and "bf16". */ --AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, 0, AARCH64_FL_SIMD | \ -- AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | \ -- AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | AARCH64_FL_SM4 | \ -- AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES | \ -- AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4 | \ -- AARCH64_FL_SVE2_BITPERM | AARCH64_FL_I8MM | \ -- AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_BF16, -- false, "fp") -- --/* Enabling "simd" also enables "fp". -- Disabling "simd" also disables "crypto", "dotprod", "aes", "sha2", "sha3", -- "sm3/sm4", "sve", "sve2", "sve2-aes", "sve2-sha3", "sve2-sm4", -- "sve2-bitperm", "i8mm", "f32mm" and "f64mm". */ --AARCH64_OPT_EXTENSION("simd", AARCH64_FL_SIMD, AARCH64_FL_FP, \ -- AARCH64_FL_CRYPTO | AARCH64_FL_DOTPROD | \ -- AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \ -- AARCH64_FL_SM4 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | \ -- AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 | \ -- AARCH64_FL_SVE2_SM4 | AARCH64_FL_SVE2_BITPERM | \ -- AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM, \ -- false, "asimd") -- --/* Enabling or disabling "crc" only changes "crc". */ --AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, 0, 0, false, "crc32") -- --/* Enabling or disabling "lse" only changes "lse". */ --AARCH64_OPT_EXTENSION("lse", AARCH64_FL_LSE, 0, 0, false, "atomics") -- --/* Enabling "fp16" also enables "fp". -- Disabling "fp16" disables "fp16", "fp16fml", "sve", "sve2", -- "sve2-aes", "sve2-sha3", "sve2-sm4", "sve2-bitperm", "f32mm" and -- "f64mm". */ --AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, \ -- AARCH64_FL_F16FML | AARCH64_FL_SVE | AARCH64_FL_F32MM | \ -- AARCH64_FL_F64MM | AARCH64_FL_SVE2 | \ -- AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 | \ -- AARCH64_FL_SVE2_SM4 | AARCH64_FL_SVE2_BITPERM, false, \ -- "fphp asimdhp") -- --/* Enabling or disabling "rcpc" only changes "rcpc". */ --AARCH64_OPT_EXTENSION("rcpc", AARCH64_FL_RCPC, 0, 0, false, "lrcpc") -- --/* Enabling "rdma" also enables "fp", "simd". -- Disabling "rdma" just disables "rdma". */ --AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, \ -- AARCH64_FL_FP | AARCH64_FL_SIMD, 0, false, "asimdrdm") -- --/* Enabling "dotprod" also enables "simd". -- Disabling "dotprod" only disables "dotprod". */ --AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_FPSIMD, 0, \ -- false, "asimddp") -- --/* Enabling "aes" also enables "simd". -- Disabling "aes" disables "aes" and "sve2-aes'. */ --AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_FPSIMD, \ -- AARCH64_FL_SVE2_AES | AARCH64_FL_CRYPTO, false, "aes") -- --/* Enabling "sha2" also enables "simd". -- Disabling "sha2" just disables "sha2". */ --AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_FPSIMD, \ -- AARCH64_FL_CRYPTO | AARCH64_FL_SHA3 | \ -- AARCH64_FL_SVE2_SHA3, false, "sha1 sha2") -- --/* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2". -- Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4", -- "sve2-aes", "sve2-sha3", "sve2-sm4". */ --AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | \ -- AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, \ -- AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \ -- AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | \ -- AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4, true, \ -+ The list of features must follow topological order wrt REQUIRES -+ and EXPLICIT_ON. For example, if A is in B's REQUIRES list, A must -+ come before B. This is enforced by aarch64-feature-deps.h. -+ -+ NOTE: Any changes to the AARCH64_OPT_EXTENSION macro need to be mirrored in -+ config.gcc. */ -+ -+AARCH64_OPT_EXTENSION("fp", FP, (), (), (), "fp") -+ -+AARCH64_OPT_EXTENSION("simd", SIMD, (FP), (), (), "asimd") -+ -+AARCH64_OPT_EXTENSION("crc", CRC, (), (), (), "crc32") -+ -+AARCH64_OPT_EXTENSION("lse", LSE, (), (), (), "atomics") -+ -+/* +nofp16 disables an implicit F16FML, even though an implicit F16FML -+ does not imply F16. See F16FML for more details. */ -+AARCH64_OPT_EXTENSION("fp16", F16, (FP), (), (F16FML), "fphp asimdhp") -+ -+AARCH64_OPT_EXTENSION("rcpc", RCPC, (), (), (), "lrcpc") -+ -+/* An explicit +rdma implies +simd, but +rdma+nosimd still enables scalar -+ RDMA instructions. */ -+AARCH64_OPT_EXTENSION("rdma", RDMA, (), (SIMD), (), "asimdrdm") -+ -+AARCH64_OPT_EXTENSION("dotprod", DOTPROD, (SIMD), (), (), "asimddp") -+ -+AARCH64_OPT_EXTENSION("aes", AES, (SIMD), (), (), "aes") -+ -+AARCH64_OPT_EXTENSION("sha2", SHA2, (SIMD), (), (), "sha1 sha2") -+ -+/* +nocrypto disables AES, SHA2 and SM4, and anything that depends on them -+ (such as SHA3 and the SVE2 crypto extensions). */ -+AARCH64_OPT_EXTENSION("crypto", CRYPTO, (AES, SHA2), (), (AES, SHA2, SM4), - "aes pmull sha1 sha2") - --/* Enabling "sha3" enables "simd" and "sha2". -- Disabling "sha3" disables "sha3" and "sve2-sha3". */ --AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_FPSIMD | \ -- AARCH64_FL_SHA2, AARCH64_FL_SVE2_SHA3, false, \ -- "sha3 sha512") -- --/* Enabling "sm4" also enables "simd". -- Disabling "sm4" disables "sm4" and "sve2-sm4". */ --AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_FPSIMD, \ -- AARCH64_FL_SVE2_SM4, false, "sm3 sm4") -- --/* Enabling "fp16fml" also enables "fp" and "fp16". -- Disabling "fp16fml" just disables "fp16fml". */ --AARCH64_OPT_EXTENSION("fp16fml", AARCH64_FL_F16FML, \ -- AARCH64_FL_FP | AARCH64_FL_F16, 0, false, "asimdfhm") -- --/* Enabling "sve" also enables "fp16", "fp" and "simd". -- Disabling "sve" disables "sve", "f32mm", "f64mm", "sve2", "sve2-aes", -- "sve2-sha3", "sve2-sm4" and "sve2-bitperm". */ --AARCH64_OPT_EXTENSION("sve", AARCH64_FL_SVE, AARCH64_FL_FP | AARCH64_FL_SIMD | \ -- AARCH64_FL_F16, AARCH64_FL_F32MM | AARCH64_FL_F64MM | \ -- AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES | \ -- AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4 | \ -- AARCH64_FL_SVE2_BITPERM, false, "sve") -- --/* Enabling/Disabling "profile" does not enable/disable any other feature. */ --AARCH64_OPT_EXTENSION("profile", AARCH64_FL_PROFILE, 0, 0, false, "") -- --/* Enabling/Disabling "rng" only changes "rng". */ --AARCH64_OPT_EXTENSION("rng", AARCH64_FL_RNG, 0, 0, false, "rng") -- --/* Enabling/Disabling "memtag" only changes "memtag". */ --AARCH64_OPT_EXTENSION("memtag", AARCH64_FL_MEMTAG, 0, 0, false, "") -- --/* Enabling/Disabling "sb" only changes "sb". */ --AARCH64_OPT_EXTENSION("sb", AARCH64_FL_SB, 0, 0, false, "sb") -- --/* Enabling/Disabling "ssbs" only changes "ssbs". */ --AARCH64_OPT_EXTENSION("ssbs", AARCH64_FL_SSBS, 0, 0, false, "ssbs") -- --/* Enabling/Disabling "predres" only changes "predres". */ --AARCH64_OPT_EXTENSION("predres", AARCH64_FL_PREDRES, 0, 0, false, "") -- --/* Enabling "sve2" also enables "sve", "fp16", "fp", and "simd". -- Disabling "sve2" disables "sve2", "sve2-aes", "sve2-sha3", "sve2-sm4", and -- "sve2-bitperm". */ --AARCH64_OPT_EXTENSION("sve2", AARCH64_FL_SVE2, AARCH64_FL_SVE | \ -- AARCH64_FL_FP | AARCH64_FL_SIMD | AARCH64_FL_F16, \ -- AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 | \ -- AARCH64_FL_SVE2_SM4 | AARCH64_FL_SVE2_BITPERM, false, "sve2") -- --/* Enabling "sve2-sm4" also enables "sm4", "simd", "fp16", "fp", "sve", and -- "sve2". Disabling "sve2-sm4" just disables "sve2-sm4". */ --AARCH64_OPT_EXTENSION("sve2-sm4", AARCH64_FL_SVE2_SM4, AARCH64_FL_SM4 | \ -- AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \ -- AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "svesm4") -- --/* Enabling "sve2-aes" also enables "aes", "simd", "fp16", "fp", "sve", and -- "sve2". Disabling "sve2-aes" just disables "sve2-aes". */ --AARCH64_OPT_EXTENSION("sve2-aes", AARCH64_FL_SVE2_AES, AARCH64_FL_AES | \ -- AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \ -- AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "sveaes") -- --/* Enabling "sve2-sha3" also enables "sha3", "simd", "fp16", "fp", "sve", and -- "sve2". Disabling "sve2-sha3" just disables "sve2-sha3". */ --AARCH64_OPT_EXTENSION("sve2-sha3", AARCH64_FL_SVE2_SHA3, AARCH64_FL_SHA3 | \ -- AARCH64_FL_SHA2 | \ -- AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \ -- AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "svesha3") -- --/* Enabling "sve2-bitperm" also enables "simd", "fp16", "fp", "sve", and -- "sve2". Disabling "sve2-bitperm" just disables "sve2-bitperm". */ --AARCH64_OPT_EXTENSION("sve2-bitperm", AARCH64_FL_SVE2_BITPERM, AARCH64_FL_SIMD | \ -- AARCH64_FL_F16 | AARCH64_FL_FP | AARCH64_FL_SVE | \ -- AARCH64_FL_SVE2, 0, false, "svebitperm") -- --/* Enabling or disabling "tme" only changes "tme". */ --AARCH64_OPT_EXTENSION("tme", AARCH64_FL_TME, 0, 0, false, "") -- --/* Enabling "i8mm" also enables "simd" and "fp". -- Disabling "i8mm" only disables "i8mm". */ --AARCH64_OPT_EXTENSION("i8mm", AARCH64_FL_I8MM, \ -- AARCH64_FL_SIMD | AARCH64_FL_FP, 0, false, "i8mm") -- --/* Enabling "f32mm" also enables "sve", "fp16", "fp", and "simd". -- Disabling "f32mm" only disables "f32mm". */ --AARCH64_OPT_EXTENSION("f32mm", AARCH64_FL_F32MM, \ -- AARCH64_FL_SVE | AARCH64_FL_F16 | AARCH64_FL_FP | \ -- AARCH64_FL_SIMD, 0, false, "f32mm") -- --/* Enabling "f64mm" also enables "sve", "fp16", "fp", and "simd". -- Disabling "f64mm" only disables "f64mm". */ --AARCH64_OPT_EXTENSION("f64mm", AARCH64_FL_F64MM, \ -- AARCH64_FL_SVE | AARCH64_FL_F16 | AARCH64_FL_FP | \ -- AARCH64_FL_SIMD, 0, false, "f64mm") -- --/* Enabling "bf16" also enables "simd" and "fp". -- Disabling "bf16" only disables "bf16". */ --AARCH64_OPT_EXTENSION("bf16", AARCH64_FL_BF16, \ -- AARCH64_FL_SIMD | AARCH64_FL_FP, 0, false, "bf16") -- --/* Enabling/Disabling "flagm" only changes "flagm". */ --AARCH64_OPT_EXTENSION("flagm", AARCH64_FL_FLAGM, 0, 0, false, "flagm") -- --/* Enabling/Disabling "pauth" only changes "pauth". */ --AARCH64_OPT_EXTENSION("pauth", AARCH64_FL_PAUTH, 0, 0, false, "paca pacg") -- --/* Enabling/Disabling "ls64" only changes "ls64". */ --AARCH64_OPT_EXTENSION("ls64", AARCH64_FL_LS64, 0, 0, false, "") -- --/* Enabling/disabling "mops" only changes "mops". */ --AARCH64_OPT_EXTENSION("mops", AARCH64_FL_MOPS, 0, 0, false, "") -+AARCH64_OPT_EXTENSION("sha3", SHA3, (SHA2), (), (), "sha3 sha512") -+ -+AARCH64_OPT_EXTENSION("sm4", SM4, (SIMD), (), (), "sm3 sm4") -+ -+/* An explicit +fp16fml implies +fp16, but a dependence on it does not. -+ Thus -march=armv8.4-a implies F16FML but not F16. -march=armv8.4-a+fp16 -+ and -march=armv8.4-a+fp16fml are equivalent and enable both F16FML and F16. -+ -march=armv8.4-a+nofp16+fp16 enables F16 but not F16FML. */ -+AARCH64_OPT_EXTENSION("fp16fml", F16FML, (), (F16), (), "asimdfhm") -+ -+AARCH64_OPT_EXTENSION("sve", SVE, (SIMD, F16), (), (), "sve") -+ -+AARCH64_OPT_EXTENSION("profile", PROFILE, (), (), (), "") -+ -+AARCH64_OPT_EXTENSION("rng", RNG, (), (), (), "rng") -+ -+AARCH64_OPT_EXTENSION("memtag", MEMTAG, (), (), (), "") -+ -+AARCH64_OPT_EXTENSION("sb", SB, (), (), (), "sb") -+ -+AARCH64_OPT_EXTENSION("ssbs", SSBS, (), (), (), "ssbs") -+ -+AARCH64_OPT_EXTENSION("predres", PREDRES, (), (), (), "") -+ -+AARCH64_OPT_EXTENSION("sve2", SVE2, (SVE), (), (), "sve2") -+ -+AARCH64_OPT_EXTENSION("sve2-sm4", SVE2_SM4, (SVE2, SM4), (), (), "svesm4") -+ -+AARCH64_OPT_EXTENSION("sve2-aes", SVE2_AES, (SVE2, AES), (), (), "sveaes") -+ -+AARCH64_OPT_EXTENSION("sve2-sha3", SVE2_SHA3, (SVE2, SHA3), (), (), "svesha3") -+ -+AARCH64_OPT_EXTENSION("sve2-bitperm", SVE2_BITPERM, (SVE2), (), (), -+ "svebitperm") -+ -+AARCH64_OPT_EXTENSION("tme", TME, (), (), (), "") -+ -+AARCH64_OPT_EXTENSION("i8mm", I8MM, (SIMD), (), (), "i8mm") -+ -+AARCH64_OPT_EXTENSION("f32mm", F32MM, (SVE), (), (), "f32mm") -+ -+AARCH64_OPT_EXTENSION("f64mm", F64MM, (SVE), (), (), "f64mm") -+ -+/* An explicit +bf16 implies +simd, but +bf16+nosimd still enables scalar BF16 -+ instructions. */ -+AARCH64_OPT_EXTENSION("bf16", BF16, (FP), (SIMD), (), "bf16") -+ -+AARCH64_OPT_EXTENSION("flagm", FLAGM, (), (), (), "flagm") -+ -+AARCH64_OPT_EXTENSION("pauth", PAUTH, (), (), (), "paca pacg") -+ -+AARCH64_OPT_EXTENSION("ls64", LS64, (), (), (), "") -+ -+AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "") - - #undef AARCH64_OPT_EXTENSION -diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h -index 421648a15..ba23c90c4 100644 ---- a/gcc/config/aarch64/aarch64-opts.h -+++ b/gcc/config/aarch64/aarch64-opts.h -@@ -22,6 +22,10 @@ - #ifndef GCC_AARCH64_OPTS_H - #define GCC_AARCH64_OPTS_H - -+#ifndef USED_FOR_TARGET -+typedef uint64_t aarch64_feature_flags; -+#endif -+ - /* The various cores that implement AArch64. */ - enum aarch64_processor - { -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc -index 12d9beee4..c06e99339 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc -@@ -701,9 +701,8 @@ check_required_extensions (location_t location, tree fndecl, - return check_required_registers (location, fndecl); - - static const struct { uint64_t flag; const char *name; } extensions[] = { --#define AARCH64_OPT_EXTENSION(EXT_NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, \ -- SYNTHETIC, FEATURE_STRING) \ -- { FLAG_CANONICAL, EXT_NAME }, -+#define AARCH64_OPT_EXTENSION(EXT_NAME, IDENT, C, D, E, F) \ -+ { AARCH64_FL_##IDENT, EXT_NAME }, - #include "aarch64-option-extensions.def" - }; - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index f975aad07..1363873b1 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -81,6 +81,7 @@ - #include "rtlanal.h" - #include "tree-dfa.h" - #include "asan.h" -+#include "aarch64-feature-deps.h" - - /* This file should be included last. */ - #include "target-def.h" -@@ -2935,8 +2936,9 @@ struct processor - /* Architectures implementing AArch64. */ - static const struct processor all_architectures[] = - { --#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \ -- {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, FLAGS, NULL}, -+#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, D, E) \ -+ {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, \ -+ feature_deps::ARCH_IDENT ().enable, NULL}, - #include "aarch64-arches.def" - {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL} - }; -@@ -2944,12 +2946,12 @@ static const struct processor all_architectures[] = - /* Processor cores implementing AArch64. */ - static const struct processor all_cores[] = - { --#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \ -- {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \ -- AARCH64_FL_FOR_##ARCH | FLAGS, &COSTS##_tunings}, -+#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, E, COSTS, G, H, I) \ -+ {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \ -+ feature_deps::cpu_##IDENT, &COSTS##_tunings}, - #include "aarch64-cores.def" - {"generic", generic, cortexa53, AARCH64_ARCH_V8A, -- AARCH64_FL_FOR_V8A, &generic_tunings}, -+ feature_deps::V8A ().enable, &generic_tunings}, - {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL} - }; - -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index f4e0cd148..50a2ef444 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -144,149 +144,27 @@ - - #define PCC_BITFIELD_TYPE_MATTERS 1 - --/* Instruction tuning/selection flags. */ -- --/* Bit values used to identify processor capabilities. */ --#define AARCH64_FL_SIMD (1 << 0) /* Has SIMD instructions. */ --#define AARCH64_FL_FP (1 << 1) /* Has FP. */ --#define AARCH64_FL_CRYPTO (1 << 2) /* Has crypto. */ --#define AARCH64_FL_CRC (1 << 3) /* Has CRC. */ --/* ARMv8.1-A architecture extensions. */ --#define AARCH64_FL_LSE (1 << 4) /* Has Large System Extensions. */ --#define AARCH64_FL_RDMA (1 << 5) /* Has Round Double Multiply Add. */ --#define AARCH64_FL_V8_1A (1 << 6) /* Has ARMv8.1-A extensions. */ --/* Armv8-R. */ --#define AARCH64_FL_V8R (1 << 7) /* Armv8-R AArch64. */ --/* ARMv8.2-A architecture extensions. */ --#define AARCH64_FL_V8_2A (1 << 8) /* Has ARMv8.2-A features. */ --#define AARCH64_FL_F16 (1 << 9) /* Has ARMv8.2-A FP16 extensions. */ --#define AARCH64_FL_SVE (1 << 10) /* Has Scalable Vector Extensions. */ --/* ARMv8.3-A architecture extensions. */ --#define AARCH64_FL_V8_3A (1 << 11) /* Has ARMv8.3-A features. */ --#define AARCH64_FL_RCPC (1 << 12) /* Has support for RCpc model. */ --#define AARCH64_FL_DOTPROD (1 << 13) /* Has ARMv8.2-A Dot Product ins. */ --/* New flags to split crypto into aes and sha2. */ --#define AARCH64_FL_AES (1 << 14) /* Has Crypto AES. */ --#define AARCH64_FL_SHA2 (1 << 15) /* Has Crypto SHA2. */ --/* ARMv8.4-A architecture extensions. */ --#define AARCH64_FL_V8_4A (1 << 16) /* Has ARMv8.4-A features. */ --#define AARCH64_FL_SM4 (1 << 17) /* Has ARMv8.4-A SM3 and SM4. */ --#define AARCH64_FL_SHA3 (1 << 18) /* Has ARMv8.4-a SHA3 and SHA512. */ --#define AARCH64_FL_F16FML (1 << 19) /* Has ARMv8.4-a FP16 extensions. */ -- --/* Statistical Profiling extensions. */ --#define AARCH64_FL_PROFILE (1 << 21) -- --/* ARMv8.5-A architecture extensions. */ --#define AARCH64_FL_V8_5A (1 << 22) /* Has ARMv8.5-A features. */ --#define AARCH64_FL_RNG (1 << 23) /* ARMv8.5-A Random Number Insns. */ --#define AARCH64_FL_MEMTAG (1 << 24) /* ARMv8.5-A Memory Tagging -- Extensions. */ -- --/* Speculation Barrier instruction supported. */ --#define AARCH64_FL_SB (1 << 25) -- --/* Speculative Store Bypass Safe instruction supported. */ --#define AARCH64_FL_SSBS (1 << 26) -- --/* Execution and Data Prediction Restriction instructions supported. */ --#define AARCH64_FL_PREDRES (1 << 27) -- --/* SVE2 instruction supported. */ --#define AARCH64_FL_SVE2 (1 << 28) --#define AARCH64_FL_SVE2_AES (1 << 29) --#define AARCH64_FL_SVE2_SM4 (1 << 30) --#define AARCH64_FL_SVE2_SHA3 (1ULL << 31) --#define AARCH64_FL_SVE2_BITPERM (1ULL << 32) -- --/* Transactional Memory Extension. */ --#define AARCH64_FL_TME (1ULL << 33) /* Has TME instructions. */ -- --/* Armv8.6-A architecture extensions. */ --#define AARCH64_FL_V8_6A (1ULL << 34) -- --/* 8-bit Integer Matrix Multiply (I8MM) extensions. */ --#define AARCH64_FL_I8MM (1ULL << 35) -- --/* Brain half-precision floating-point (BFloat16) Extension. */ --#define AARCH64_FL_BF16 (1ULL << 36) -- --/* 32-bit Floating-point Matrix Multiply (F32MM) extensions. */ --#define AARCH64_FL_F32MM (1ULL << 37) -- --/* 64-bit Floating-point Matrix Multiply (F64MM) extensions. */ --#define AARCH64_FL_F64MM (1ULL << 38) -- --/* Flag Manipulation Instructions (FLAGM) extension. */ --#define AARCH64_FL_FLAGM (1ULL << 39) -- --/* Pointer Authentication (PAUTH) extension. */ --#define AARCH64_FL_PAUTH (1ULL << 40) -- --/* Armv9.0-A. */ --#define AARCH64_FL_V9A (1ULL << 41) /* Armv9.0-A Architecture. */ -- --/* 64-byte atomic load/store extensions. */ --#define AARCH64_FL_LS64 (1ULL << 42) -- --/* Armv8.7-a architecture extensions. */ --#define AARCH64_FL_V8_7A (1ULL << 43) -- --/* Hardware memory operation instructions. */ --#define AARCH64_FL_MOPS (1ULL << 44) -- --/* Armv8.8-a architecture extensions. */ --#define AARCH64_FL_V8_8A (1ULL << 45) -- --/* Armv9.1-A. */ --#define AARCH64_FL_V9_1A (1ULL << 46) -- --/* Armv9.2-A. */ --#define AARCH64_FL_V9_2A (1ULL << 47) -- --/* Armv9.3-A. */ --#define AARCH64_FL_V9_3A (1ULL << 48) -- --/* Has FP and SIMD. */ --#define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD) -- --/* Has FP without SIMD. */ --#define AARCH64_FL_FPQ16 (AARCH64_FL_FP & ~AARCH64_FL_SIMD) -- --/* Architecture flags that effect instruction selection. */ --#define AARCH64_FL_FOR_V8A (AARCH64_FL_FPSIMD) --#define AARCH64_FL_FOR_V8_1A \ -- (AARCH64_FL_FOR_V8A | AARCH64_FL_LSE | AARCH64_FL_CRC \ -- | AARCH64_FL_RDMA | AARCH64_FL_V8_1A) --#define AARCH64_FL_FOR_V8_2A \ -- (AARCH64_FL_FOR_V8_1A | AARCH64_FL_V8_2A) --#define AARCH64_FL_FOR_V8_3A \ -- (AARCH64_FL_FOR_V8_2A | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH) --#define AARCH64_FL_FOR_V8_4A \ -- (AARCH64_FL_FOR_V8_3A | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \ -- | AARCH64_FL_DOTPROD | AARCH64_FL_FLAGM) --#define AARCH64_FL_FOR_V8_5A \ -- (AARCH64_FL_FOR_V8_4A | AARCH64_FL_V8_5A \ -- | AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES) --#define AARCH64_FL_FOR_V8_6A \ -- (AARCH64_FL_FOR_V8_5A | AARCH64_FL_V8_6A | AARCH64_FL_FPSIMD \ -- | AARCH64_FL_I8MM | AARCH64_FL_BF16) --#define AARCH64_FL_FOR_V8_7A \ -- (AARCH64_FL_FOR_V8_6A | AARCH64_FL_V8_7A | AARCH64_FL_LS64) --#define AARCH64_FL_FOR_V8_8A \ -- (AARCH64_FL_FOR_V8_7A | AARCH64_FL_V8_8A | AARCH64_FL_MOPS) -- --#define AARCH64_FL_FOR_V8R \ -- (AARCH64_FL_FOR_V8_4A | AARCH64_FL_V8R) --#define AARCH64_FL_FOR_V9A \ -- (AARCH64_FL_FOR_V8_5A | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9A \ -- | AARCH64_FL_F16) --#define AARCH64_FL_FOR_V9_1A \ -- (AARCH64_FL_FOR_V9A | AARCH64_FL_FOR_V8_6A | AARCH64_FL_V9_1A) --#define AARCH64_FL_FOR_V9_2A \ -- (AARCH64_FL_FOR_V9_1A | AARCH64_FL_FOR_V8_7A | AARCH64_FL_V9_2A) --#define AARCH64_FL_FOR_V9_3A \ -- (AARCH64_FL_FOR_V9_2A | AARCH64_FL_FOR_V8_8A | AARCH64_FL_V9_3A) -+#ifndef USED_FOR_TARGET -+ -+/* Define an enum of all features (architectures and extensions). */ -+enum class aarch64_feature : unsigned char { -+#define AARCH64_OPT_EXTENSION(A, IDENT, C, D, E, F) IDENT, -+#define AARCH64_ARCH(A, B, IDENT, D, E) IDENT, -+#include "aarch64-option-extensions.def" -+#include "aarch64-arches.def" -+}; -+ -+/* Define unique flags for each of the above. */ -+#define HANDLE(IDENT) \ -+ constexpr auto AARCH64_FL_##IDENT \ -+ = aarch64_feature_flags (1) << int (aarch64_feature::IDENT); -+#define AARCH64_OPT_EXTENSION(A, IDENT, C, D, E, F) HANDLE (IDENT) -+#define AARCH64_ARCH(A, B, IDENT, D, E) HANDLE (IDENT) -+#include "aarch64-option-extensions.def" -+#include "aarch64-arches.def" -+#undef HANDLE -+ -+#endif - - /* Macros to test ISA flags. */ - -diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc -index 97690de62..ddfc9451f 100644 ---- a/gcc/config/aarch64/driver-aarch64.cc -+++ b/gcc/config/aarch64/driver-aarch64.cc -@@ -26,6 +26,7 @@ - #include "coretypes.h" - #include "tm.h" - #include "aarch64-protos.h" -+#include "aarch64-feature-deps.h" - - struct aarch64_arch_extension - { -@@ -34,9 +35,8 @@ struct aarch64_arch_extension - const char *feat_string; - }; - --#define AARCH64_OPT_EXTENSION(EXT_NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, \ -- SYNTHETIC, FEATURE_STRING) \ -- { EXT_NAME, FLAG_CANONICAL, FEATURE_STRING }, -+#define AARCH64_OPT_EXTENSION(EXT_NAME, IDENT, C, D, E, FEATURE_STRING) \ -+ { EXT_NAME, AARCH64_FL_##IDENT, FEATURE_STRING }, - static struct aarch64_arch_extension aarch64_extensions[] = - { - #include "aarch64-option-extensions.def" -@@ -62,7 +62,7 @@ struct aarch64_core_data - #define DEFAULT_ARCH "8A" - - #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \ -- { CORE_NAME, #ARCH, IMP, PART, VARIANT, AARCH64_FL_FOR_##ARCH | FLAGS }, -+ { CORE_NAME, #ARCH, IMP, PART, VARIANT, feature_deps::cpu_##CORE_IDENT }, - - static struct aarch64_core_data aarch64_cpu_data[] = - { -@@ -80,7 +80,7 @@ struct aarch64_arch_driver_info - - /* Skip the leading "V" in the architecture name. */ - #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \ -- { #ARCH_IDENT + 1, NAME, FLAGS }, -+ { #ARCH_IDENT + 1, NAME, feature_deps::ARCH_IDENT ().enable }, - - static struct aarch64_arch_driver_info aarch64_arches[] = - { --- -2.33.0 - diff --git a/0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch b/0118-aarch64-Simplify-probe-of-final-frame-allocation.patch similarity index 87% rename from 0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch rename to 0118-aarch64-Simplify-probe-of-final-frame-allocation.patch index 70cd64b..1441353 100644 --- a/0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch +++ b/0118-aarch64-Simplify-probe-of-final-frame-allocation.patch @@ -1,10 +1,7 @@ -From c12de24e57cbe26c224bab39698736fa4004f8ff Mon Sep 17 00:00:00 2001 +From c4f0e121faa36342f1d21919e54a05ad841c4f86 Mon Sep 17 00:00:00 2001 From: Richard Sandiford -Date: Tue, 12 Sep 2023 16:05:11 +0100 -Subject: [PATCH 099/157] [Backport][SME] aarch64: Simplify probe of final - frame allocation - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f87028a905059573ae7fdfe526d034fd70b3bcae +Date: Tue, 12 Sep 2023 16:08:55 +0100 +Subject: [PATCH] aarch64: Simplify probe of final frame allocation Previous patches ensured that the final frame allocation only needs a probe when the size is strictly greater than 1KiB. It's therefore @@ -31,10 +28,10 @@ gcc/testsuite/ 4 files changed, 9 insertions(+), 13 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 1ba4c2f89..6d835dc8f 100644 +index 16433fb70f4f..8abf3d7a1e2b 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc -@@ -10133,16 +10133,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, +@@ -9523,16 +9523,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, are still safe. */ if (residual) { @@ -53,7 +50,7 @@ index 1ba4c2f89..6d835dc8f 100644 aarch64_sub_sp (temp1, temp2, residual, frame_related_p); if (residual >= min_probe_threshold) -@@ -10153,8 +10149,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, +@@ -9543,8 +9539,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required." "\n", residual); @@ -65,7 +62,7 @@ index 1ba4c2f89..6d835dc8f 100644 } } diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c -index 0d8a25d73..f0ec13897 100644 +index 0d8a25d73a24..f0ec1389771d 100644 --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c @@ -33,7 +33,7 @@ int test1(int z) { @@ -78,7 +75,7 @@ index 0d8a25d73..f0ec13897 100644 ** bl g ** ... diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c -index 82447d20f..6383bec5e 100644 +index 82447d20fff5..6383bec5ebcd 100644 --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c @@ -9,7 +9,7 @@ void g(); @@ -100,7 +97,7 @@ index 82447d20f..6383bec5e 100644 ** bl g ** ... diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c -index 73ac3e4e4..562039b5e 100644 +index 73ac3e4e4eb0..562039b5e9b8 100644 --- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c +++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c @@ -9,7 +9,7 @@ void g(); @@ -122,5 +119,5 @@ index 73ac3e4e4..562039b5e 100644 ** bl g ** ... -- -2.33.0 +2.43.5 diff --git a/0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch b/0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch deleted file mode 100644 index fb3f7a8..0000000 --- a/0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch +++ /dev/null @@ -1,467 +0,0 @@ -From e7ebc54e809e8647ff054a02fbaf946b41414004 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 29 Sep 2022 11:32:55 +0100 -Subject: [PATCH 020/157] [Backport][SME] aarch64: Simplify generation of .arch - strings - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4ebf56f283ae5a98ae4c43079b7e8459945ef18d - -aarch64-common.cc has two arrays, one maintaining the original -definition order and one sorted by population count. Sorting -by population count was a way of ensuring topological ordering, -taking advantage of the fact that the entries are partially -ordered by the subset relation. However, the sorting is not -needed now that the .def file is forced to have topological -order from the outset. - -Other changes are: - -(1) The population count used: - - uint64_t total_flags_a = opt_a->flag_canonical & opt_a->flags_on; - uint64_t total_flags_b = opt_b->flag_canonical & opt_b->flags_on; - int popcnt_a = popcount_hwi ((HOST_WIDE_INT)total_flags_a); - int popcnt_b = popcount_hwi ((HOST_WIDE_INT)total_flags_b); - - where I think the & was supposed to be |. This meant that the - counts would always be 1 in practice, since flag_canonical is - a single bit. This led us to printing +nofp+nosimd even though - GCC "knows" (and GAS agrees) that +nofp disables simd. - -(2) The .arch output code converts +aes+sha2 to +crypto. I think - the main reason for doing this is to support assemblers that - predate the individual per-feature crypto flags. It therefore - seems more natural to treat it as a special case, rather than - as an instance of a general pattern. Hopefully we won't do - something similar in future! - - (There is already special handling of CRC, for different reasons.) - -(3) Previously, if the /proc/cpuinfo code saw a feature like sve, - it would assume the presence of all the features that sve - depends on. It would be possible to keep that behaviour - if necessary, but it was simpler to assume the presence of - fp16 (say) only when fphp is present. There's an argument - that that's more conservatively correct too. - -gcc/ - * common/config/aarch64/aarch64-common.cc - (TARGET_OPTION_INIT_STRUCT): Delete. - (aarch64_option_extension): Remove is_synthetic_flag. - (all_extensions): Update accordingly. - (all_extensions_by_on, opt_ext, opt_ext_cmp): Delete. - (aarch64_option_init_struct, aarch64_contains_opt): Delete. - (aarch64_get_extension_string_for_isa_flags): Rewrite to use - all_extensions instead of all_extensions_on. - -gcc/testsuite/ - * gcc.target/aarch64/cpunative/info_8: Add all dependencies of sve. - * gcc.target/aarch64/cpunative/info_9: Likewise svesm4. - * gcc.target/aarch64/cpunative/info_15: Likewise. - * gcc.target/aarch64/cpunative/info_16: Likewise sve2. - * gcc.target/aarch64/cpunative/info_17: Likewise. - * gcc.target/aarch64/cpunative/native_cpu_2.c: Expect just +nofp - rather than +nofp+nosimd. - * gcc.target/aarch64/cpunative/native_cpu_10.c: Likewise. - * gcc.target/aarch64/target_attr_15.c: Likewise. ---- - gcc/common/config/aarch64/aarch64-common.cc | 244 ++++-------------- - .../gcc.target/aarch64/cpunative/info_15 | 2 +- - .../gcc.target/aarch64/cpunative/info_16 | 2 +- - .../gcc.target/aarch64/cpunative/info_17 | 2 +- - .../gcc.target/aarch64/cpunative/info_8 | 2 +- - .../gcc.target/aarch64/cpunative/info_9 | 2 +- - .../aarch64/cpunative/native_cpu_10.c | 2 +- - .../aarch64/cpunative/native_cpu_2.c | 2 +- - .../gcc.target/aarch64/target_attr_15.c | 2 +- - 9 files changed, 55 insertions(+), 205 deletions(-) - -diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc -index 74729bb30..057dc094d 100644 ---- a/gcc/common/config/aarch64/aarch64-common.cc -+++ b/gcc/common/config/aarch64/aarch64-common.cc -@@ -42,8 +42,6 @@ - - #undef TARGET_OPTION_OPTIMIZATION_TABLE - #define TARGET_OPTION_OPTIMIZATION_TABLE aarch_option_optimization_table --#undef TARGET_OPTION_INIT_STRUCT --#define TARGET_OPTION_INIT_STRUCT aarch64_option_init_struct - - #define INVALID_IMP ((unsigned) -1) - -@@ -209,7 +207,6 @@ struct aarch64_option_extension - const uint64_t flag_canonical; - const uint64_t flags_on; - const uint64_t flags_off; -- const bool is_synthetic; - }; - - /* ISA extensions in AArch64. */ -@@ -219,24 +216,9 @@ static const struct aarch64_option_extension all_extensions[] = - {NAME, AARCH64_FL_##IDENT, \ - feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \ - feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \ -- & ~AARCH64_FL_##IDENT, \ -- AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO}, -+ & ~AARCH64_FL_##IDENT}, - #include "config/aarch64/aarch64-option-extensions.def" -- {NULL, 0, 0, 0, false} --}; -- --/* A copy of the ISA extensions list for AArch64 sorted by the popcount of -- bits and extension turned on. Cached for efficiency. */ --static struct aarch64_option_extension all_extensions_by_on[] = --{ --#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \ -- {NAME, AARCH64_FL_##IDENT, \ -- feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \ -- feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \ -- & ~AARCH64_FL_##IDENT, \ -- AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO}, --#include "config/aarch64/aarch64-option-extensions.def" -- {NULL, 0, 0, 0, false} -+ {NULL, 0, 0, 0} - }; - - struct processor_name_to_arch -@@ -353,79 +335,6 @@ aarch64_get_all_extension_candidates (auto_vec *candidates) - candidates->safe_push (opt->name); - } - --/* Comparer to sort aarch64's feature extensions by population count. Largest -- first. */ -- --typedef const struct aarch64_option_extension opt_ext; -- --int opt_ext_cmp (const void* a, const void* b) --{ -- opt_ext *opt_a = (opt_ext *)a; -- opt_ext *opt_b = (opt_ext *)b; -- -- /* We consider the total set of bits an options turns on to be the union of -- the singleton set containing the option itself and the set of options it -- turns on as a dependency. As an example +dotprod turns on FL_DOTPROD and -- FL_SIMD. As such the set of bits represented by this option is -- {FL_DOTPROD, FL_SIMD}. */ -- uint64_t total_flags_a = opt_a->flag_canonical & opt_a->flags_on; -- uint64_t total_flags_b = opt_b->flag_canonical & opt_b->flags_on; -- int popcnt_a = popcount_hwi ((HOST_WIDE_INT)total_flags_a); -- int popcnt_b = popcount_hwi ((HOST_WIDE_INT)total_flags_b); -- int order = popcnt_b - popcnt_a; -- -- /* If they have the same amount of bits set, give it a more -- deterministic ordering by using the value of the bits themselves. */ -- if (order != 0) -- return order; -- -- if (total_flags_a != total_flags_b) -- return total_flags_a < total_flags_b ? 1 : -1; -- -- return 0; --} -- --/* Implement TARGET_OPTION_INIT_STRUCT. */ -- --static void --aarch64_option_init_struct (struct gcc_options *opts ATTRIBUTE_UNUSED) --{ -- /* Sort the extensions based on how many bits they set, order the larger -- counts first. We sort the list because this makes processing the -- feature bits O(n) instead of O(n^2). While n is small, the function -- to calculate the feature strings is called on every options push, -- pop and attribute change (arm_neon headers, lto etc all cause this to -- happen quite frequently). It is a trade-off between time and space and -- so time won. */ -- int n_extensions -- = sizeof (all_extensions) / sizeof (struct aarch64_option_extension); -- qsort (&all_extensions_by_on, n_extensions, -- sizeof (struct aarch64_option_extension), opt_ext_cmp); --} -- --/* Checks to see if enough bits from the option OPT are enabled in -- ISA_FLAG_BITS to be able to replace the individual options with the -- canonicalized version of the option. This is done based on two rules: -- -- 1) Synthetic groups, such as +crypto we only care about the bits that are -- turned on. e.g. +aes+sha2 can be replaced with +crypto. -- -- 2) Options that themselves have a bit, such as +rdma, in this case, all the -- feature bits they turn on must be available and the bit for the option -- itself must be. In this case it's effectively a reduction rather than a -- grouping. e.g. +fp+simd is not enough to turn on +rdma, for that you would -- need +rdma+fp+simd which is reduced down to +rdma. --*/ -- --static bool --aarch64_contains_opt (uint64_t isa_flag_bits, opt_ext *opt) --{ -- uint64_t flags_check -- = opt->is_synthetic ? opt->flags_on : opt->flag_canonical; -- -- return (isa_flag_bits & flags_check) == flags_check; --} -- - /* Return a string representation of ISA_FLAGS. DEFAULT_ARCH_FLAGS - gives the default set of flags which are implied by whatever -march - we'd put out. Our job is to figure out the minimal set of "+" and -@@ -436,118 +345,59 @@ std::string - aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags, - uint64_t default_arch_flags) - { -- const struct aarch64_option_extension *opt = NULL; - std::string outstr = ""; - -- uint64_t isa_flag_bits = isa_flags; -- -- /* Pass one: Minimize the search space by reducing the set of options -- to the smallest set that still turns on the same features as before in -- conjunction with the bits that are turned on by default for the selected -- architecture. */ -- for (opt = all_extensions_by_on; opt->name != NULL; opt++) -+ aarch64_feature_flags current_flags = default_arch_flags; -+ -+ /* As a special case, do not assume that the assembler will enable CRC -+ even if it is the default for the architecture. This is required -+ because some CPUs had an incorrect specification in older assemblers: -+ even though CRC should be the default for these cases the -mcpu -+ values would not turn it on. -+ -+ However, assemblers with Armv8-R AArch64 support should not have this -+ issue, so we don't need this fix when targeting Armv8-R. */ -+ auto explicit_flags = (!(current_flags & AARCH64_FL_V8R) -+ ? AARCH64_FL_CRC : 0); -+ -+ /* Add the features in isa_flags & ~current_flags using the smallest -+ possible number of extensions. We can do this by iterating over the -+ array in reverse order, since the array is sorted topologically. -+ But in order to make the output more readable, it seems better -+ to add the strings in definition order. */ -+ aarch64_feature_flags added = 0; -+ for (unsigned int i = ARRAY_SIZE (all_extensions); i-- > 0; ) - { -- /* If the bit is on by default, then all the options it turns on are also -- on by default due to the transitive dependencies. -- -- If the option is enabled explicitly in the set then we need to emit -- an option for it. Since this list is sorted by extensions setting the -- largest number of featers first, we can be sure that nothing else will -- ever need to set the bits we already set. Consider the following -- situation: -- -- Feat1 = A + B + C -- Feat2 = A + B -- Feat3 = A + D -- Feat4 = B + C -- Feat5 = C -- -- The following results are expected: -- -- A + C = A + Feat5 -- B + C = Feat4 -- Feat4 + A = Feat1 -- Feat2 + Feat5 = Feat1 -- Feat1 + C = Feat1 -- Feat3 + Feat4 = Feat1 + D -- -- This search assumes that all invidual feature bits are use visible, -- in other words the user must be able to do +A, +B, +C and +D. */ -- if (aarch64_contains_opt (isa_flag_bits | default_arch_flags, opt)) -- { -- /* We remove all the dependent bits, to prevent them from being turned -- on twice. This only works because we assume that all there are -- individual options to set all bits standalone. */ -- -- /* PR target/94396. -- -- For flags which would already imply a bit that's on by default (e.g -- fp16fml which implies +fp,+fp16) we must emit the flags that are not -- on by default. i.e. in Armv8.4-a +fp16fml is default if +fp16. So -- if a user passes armv8.4-a+fp16 (or +fp16fml) then we need to emit -- +fp16. But if +fp16fml is used in an architecture where it is -- completely optional we only have to emit the canonical flag. */ -- uint64_t toggle_bits = opt->flags_on & default_arch_flags; -- /* Now check to see if the canonical flag is on by default. If it -- is not then enabling it will enable all bits in flags_on. */ -- if ((opt->flag_canonical & default_arch_flags) == 0) -- toggle_bits = opt->flags_on; -- -- isa_flag_bits &= ~toggle_bits; -- isa_flag_bits |= opt->flag_canonical; -- } -- } -+ auto &opt = all_extensions[i]; - -- /* By toggling bits on and off, we may have set bits on that are already -- enabled by default. So we mask the default set out so we don't emit an -- option for them. Instead of checking for this each time during Pass One -- we just mask all default bits away at the end. */ -- isa_flag_bits &= ~default_arch_flags; -- -- /* We now have the smallest set of features we need to process. A subsequent -- linear scan of the bits in isa_flag_bits will allow us to print the ext -- names. However as a special case if CRC was enabled before, always print -- it. This is required because some CPUs have an incorrect specification -- in older assemblers. Even though CRC should be the default for these -- cases the -mcpu values won't turn it on. -- -- Note that assemblers with Armv8-R AArch64 support should not have this -- issue, so we don't need this fix when targeting Armv8-R. */ -- if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8R) -- isa_flag_bits |= AARCH64_ISA_CRC; -- -- /* Pass Two: -- Print the option names that we're sure we must turn on. These are only -- optional extension names. Mandatory ones have already been removed and -- ones we explicitly want off have been too. */ -- for (opt = all_extensions_by_on; opt->name != NULL; opt++) -- { -- if (isa_flag_bits & opt->flag_canonical) -- { -- outstr += "+"; -- outstr += opt->name; -- } -- } -+ /* As a special case, emit +crypto rather than +aes+sha2, -+ in order to support assemblers that predate the separate -+ per-feature crypto flags. */ -+ auto flags = opt.flag_canonical; -+ if (flags == AARCH64_FL_CRYPTO) -+ flags = AARCH64_FL_AES | AARCH64_FL_SHA2; - -- /* Pass Three: -- Print out a +no for any mandatory extension that we are -- turning off. By this point aarch64_parse_extension would have ensured -- that any optional extensions are turned off. The only things left are -- things that can't be turned off usually, e.g. something that is on by -- default because it's mandatory and we want it off. For turning off bits -- we don't guarantee the smallest set of flags, but instead just emit all -- options the user has specified. -- -- The assembler requires all + to be printed before +no. */ -- for (opt = all_extensions_by_on; opt->name != NULL; opt++) -- { -- if ((~isa_flags) & opt->flag_canonical -- && !((~default_arch_flags) & opt->flag_canonical)) -+ if ((flags & isa_flags & (explicit_flags | ~current_flags)) == flags) - { -- outstr += "+no"; -- outstr += opt->name; -+ current_flags |= opt.flag_canonical | opt.flags_on; -+ added |= opt.flag_canonical; - } - } -+ for (auto &opt : all_extensions) -+ if (added & opt.flag_canonical) -+ { -+ outstr += "+"; -+ outstr += opt.name; -+ } -+ -+ /* Remove the features in current_flags & ~isa_flags. */ -+ for (auto &opt : all_extensions) -+ if (opt.flag_canonical & current_flags & ~isa_flags) -+ { -+ current_flags &= ~(opt.flag_canonical | opt.flags_off); -+ outstr += "+no"; -+ outstr += opt.name; -+ } - - return outstr; - } -diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_15 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_15 -index bc6453945..6b425ea20 100644 ---- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_15 -+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_15 -@@ -1,6 +1,6 @@ - processor : 0 - BogoMIPS : 100.00 --Features : Lorem ipsum dolor sit ametd rebum expetendis per at Dolor lucilius referrentur ei mei virtute eruditi eum ne Iisque verter svesm4 asimd fp -+Features : Lorem ipsum dolor sit ametd rebum expetendis per at Dolor lucilius referrentur ei mei virtute eruditi eum ne Iisque verter svesm4 asimd fp sve sve2 fphp asimdhp sm3 sm4 - CPU implementer : 0x41 - CPU architecture: 8 - CPU variant : 0x0 -diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_16 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_16 -index 2c04ff19c..26f01c496 100644 ---- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_16 -+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_16 -@@ -1,6 +1,6 @@ - processor : 0 - BogoMIPS : 100.00 --Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2 -+Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2 fphp asimdhp - CPU implementer : 0xfe - CPU architecture: 8 - CPU variant : 0x0 -diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_17 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_17 -index 2c04ff19c..26f01c496 100644 ---- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_17 -+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_17 -@@ -1,6 +1,6 @@ - processor : 0 - BogoMIPS : 100.00 --Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2 -+Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2 fphp asimdhp - CPU implementer : 0xfe - CPU architecture: 8 - CPU variant : 0x0 -diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_8 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_8 -index d6d9d03a2..76da16c57 100644 ---- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_8 -+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_8 -@@ -1,6 +1,6 @@ - processor : 0 - BogoMIPS : 100.00 --Features : asimd sve fp -+Features : asimd sve fp fphp asimdhp - CPU implementer : 0x41 - CPU architecture: 8 - CPU variant : 0x0 -diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_9 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_9 -index c9aa4a9a0..14703dd1d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_9 -+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_9 -@@ -1,6 +1,6 @@ - processor : 0 - BogoMIPS : 100.00 --Features : asimd fp svesm4 -+Features : asimd fp svesm4 sve sve2 fphp asimdhp sm3 sm4 - CPU implementer : 0x41 - CPU architecture: 8 - CPU variant : 0x0 -diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c -index 6a753965c..ddb06b822 100644 ---- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c -+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c -@@ -7,6 +7,6 @@ int main() - return 0; - } - --/* { dg-final { scan-assembler {\.arch armv8-a\+nofp\+nosimd} } } */ -+/* { dg-final { scan-assembler {\.arch armv8-a\+nofp} } } */ - - /* Test one with no entry in feature list. */ -diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c -index aad71f434..edbdb5626 100644 ---- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c -@@ -7,6 +7,6 @@ int main() - return 0; - } - --/* { dg-final { scan-assembler {\.arch armv8-a\+nofp\+nosimd} } } */ -+/* { dg-final { scan-assembler {\.arch armv8-a\+nofp} } } */ - - /* Test one where asimd is provided byt no fp. */ -diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_15.c b/gcc/testsuite/gcc.target/aarch64/target_attr_15.c -index 108b372e4..069a00108 100644 ---- a/gcc/testsuite/gcc.target/aarch64/target_attr_15.c -+++ b/gcc/testsuite/gcc.target/aarch64/target_attr_15.c -@@ -10,4 +10,4 @@ foo (int a) - return a + 1; - } - --/* { dg-final { scan-assembler-times "\\.arch armv8-a\\+nofp\\+nosimd\n" 1 } } */ -+/* { dg-final { scan-assembler-times "\\.arch armv8-a\\+nofp\n" 1 } } */ --- -2.33.0 - diff --git a/0199-Backport-SME-aarch64-Explicitly-record-probe-registe.patch b/0119-aarch64-Explicitly-record-probe-registers-in-frame-info.patch similarity index 91% rename from 0199-Backport-SME-aarch64-Explicitly-record-probe-registe.patch rename to 0119-aarch64-Explicitly-record-probe-registers-in-frame-info.patch index 6fe7afa..be81fea 100644 --- a/0199-Backport-SME-aarch64-Explicitly-record-probe-registe.patch +++ b/0119-aarch64-Explicitly-record-probe-registers-in-frame-info.patch @@ -1,10 +1,7 @@ -From 1bf3e9a04411b483c89d2e2f9096ab66800c3b3f Mon Sep 17 00:00:00 2001 +From 6f0ab0a9f46a17b68349ff6035aa776bf65f0575 Mon Sep 17 00:00:00 2001 From: Richard Sandiford -Date: Tue, 12 Sep 2023 16:05:12 +0100 -Subject: [PATCH 100/157] [Backport][SME] aarch64: Explicitly record probe - registers in frame info - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5ce957484eea15f09503fcffa4dfdfb70ad82f8f +Date: Tue, 12 Sep 2023 16:08:56 +0100 +Subject: [PATCH] aarch64: Explicitly record probe registers in frame info The stack frame is currently divided into three areas: @@ -75,10 +72,10 @@ gcc/testsuite/ 3 files changed, 64 insertions(+), 18 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 6d835dc8f..dd80ceba8 100644 +index 8abf3d7a1e2b..a8d907df8843 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc -@@ -8810,15 +8810,11 @@ aarch64_layout_frame (void) +@@ -8210,15 +8210,11 @@ aarch64_layout_frame (void) && !crtl->abi->clobbers_full_reg_p (regno)) frame.reg_offset[regno] = SLOT_REQUIRED; @@ -95,7 +92,7 @@ index 6d835dc8f..dd80ceba8 100644 /* Now assign stack slots for the registers. Start with the predicate registers, since predicate LDR and STR have a relatively small -@@ -8826,6 +8822,8 @@ aarch64_layout_frame (void) +@@ -8226,6 +8222,8 @@ aarch64_layout_frame (void) for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++) if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) { @@ -104,7 +101,7 @@ index 6d835dc8f..dd80ceba8 100644 frame.reg_offset[regno] = offset; offset += BYTES_PER_SVE_PRED; } -@@ -8863,6 +8861,8 @@ aarch64_layout_frame (void) +@@ -8263,6 +8261,8 @@ aarch64_layout_frame (void) for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) { @@ -113,7 +110,7 @@ index 6d835dc8f..dd80ceba8 100644 frame.reg_offset[regno] = offset; offset += vector_save_size; } -@@ -8872,10 +8872,18 @@ aarch64_layout_frame (void) +@@ -8272,10 +8272,18 @@ aarch64_layout_frame (void) frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs; bool saves_below_hard_fp_p = maybe_ne (frame.below_hard_fp_saved_regs_size, 0); @@ -132,7 +129,7 @@ index 6d835dc8f..dd80ceba8 100644 frame.reg_offset[regno] = offset; if (frame.wb_push_candidate1 == INVALID_REGNUM) frame.wb_push_candidate1 = regno; -@@ -8909,6 +8917,8 @@ aarch64_layout_frame (void) +@@ -8309,6 +8317,8 @@ aarch64_layout_frame (void) for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) { @@ -141,7 +138,7 @@ index 6d835dc8f..dd80ceba8 100644 /* If there is an alignment gap between integer and fp callee-saves, allocate the last fp register to it if possible. */ if (regno == last_fp_reg -@@ -8932,6 +8942,17 @@ aarch64_layout_frame (void) +@@ -8332,6 +8342,17 @@ aarch64_layout_frame (void) offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); frame.saved_regs_size = offset - frame.bytes_below_saved_regs; @@ -159,7 +156,7 @@ index 6d835dc8f..dd80ceba8 100644 offset += get_frame_size (); offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); -@@ -9062,6 +9083,25 @@ aarch64_layout_frame (void) +@@ -8462,6 +8483,25 @@ aarch64_layout_frame (void) frame.final_adjust = frame.bytes_below_saved_regs; } @@ -185,7 +182,7 @@ index 6d835dc8f..dd80ceba8 100644 /* Make sure the individual adjustments add up to the full frame size. */ gcc_assert (known_eq (frame.initial_adjust + frame.callee_adjust -@@ -9639,13 +9679,6 @@ aarch64_get_separate_components (void) +@@ -9039,13 +9079,6 @@ aarch64_get_separate_components (void) poly_int64 offset = frame.reg_offset[regno]; @@ -199,7 +196,7 @@ index 6d835dc8f..dd80ceba8 100644 /* Get the offset relative to the register we'll use. */ if (frame_pointer_needed) offset -= frame.bytes_below_hard_fp; -@@ -9680,6 +9713,13 @@ aarch64_get_separate_components (void) +@@ -9080,6 +9113,13 @@ aarch64_get_separate_components (void) bitmap_clear_bit (components, LR_REGNUM); bitmap_clear_bit (components, SP_REGNUM); @@ -213,7 +210,7 @@ index 6d835dc8f..dd80ceba8 100644 return components; } -@@ -10226,8 +10266,8 @@ aarch64_epilogue_uses (int regno) +@@ -9616,8 +9656,8 @@ aarch64_epilogue_uses (int regno) When probing is needed, we emit a probe at the start of the prologue and every PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE bytes thereafter. @@ -225,10 +222,10 @@ index 6d835dc8f..dd80ceba8 100644 For outgoing arguments we probe if the size is larger than 1KB, such that the ABI specified buffer is maintained for the next callee. diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index f340237d0..af480d9e8 100644 +index b61358370732..46d4693e2064 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h -@@ -887,6 +887,14 @@ struct GTY (()) aarch64_frame +@@ -957,6 +957,14 @@ struct GTY (()) aarch64_frame This is the register they should use. */ unsigned spare_pred_reg; @@ -244,7 +241,7 @@ index f340237d0..af480d9e8 100644 /* True if shadow call stack should be enabled for the current function. */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c -index 3e01ec36c..3530a0d50 100644 +index 3e01ec36c3a4..3530a0d504ba 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c @@ -11,11 +11,10 @@ @@ -276,5 +273,5 @@ index 3e01ec36c..3530a0d50 100644 ** ldr x24, \[sp\], 32 ** ret -- -2.33.0 +2.43.5 diff --git a/0120-Backport-SME-aarch64-Avoid-std-string-in-static-data.patch b/0120-Backport-SME-aarch64-Avoid-std-string-in-static-data.patch deleted file mode 100644 index 619342b..0000000 --- a/0120-Backport-SME-aarch64-Avoid-std-string-in-static-data.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 7096be1673a10da5218a8620fb40b4b26e61c1d4 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 29 Sep 2022 11:32:55 +0100 -Subject: [PATCH 021/157] [Backport][SME] aarch64: Avoid std::string in static - data - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=13af9e9fda391f4f0566ad8f0b4d0448a7e984d0 - -Just a minor patch to avoid having to construct std::strings -in static data. - -gcc/ - * common/config/aarch64/aarch64-common.cc (processor_name_to_arch) - (arch_to_arch_name): Use const char * instead of std::string. ---- - gcc/common/config/aarch64/aarch64-common.cc | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc -index 057dc094d..2bdf51b8b 100644 ---- a/gcc/common/config/aarch64/aarch64-common.cc -+++ b/gcc/common/config/aarch64/aarch64-common.cc -@@ -223,7 +223,7 @@ static const struct aarch64_option_extension all_extensions[] = - - struct processor_name_to_arch - { -- const std::string processor_name; -+ const char *const processor_name; - const enum aarch64_arch arch; - const uint64_t flags; - }; -@@ -231,7 +231,7 @@ struct processor_name_to_arch - struct arch_to_arch_name - { - const enum aarch64_arch arch; -- const std::string arch_name; -+ const char *const arch_name; - const uint64_t flags; - }; - --- -2.33.0 - diff --git a/0200-Backport-SME-aarch64-Remove-below_hard_fp_saved_regs.patch b/0120-aarch64-Remove-below-hard-fp-saved-regs-size.patch similarity index 87% rename from 0200-Backport-SME-aarch64-Remove-below_hard_fp_saved_regs.patch rename to 0120-aarch64-Remove-below-hard-fp-saved-regs-size.patch index 26cce09..7d97456 100644 --- a/0200-Backport-SME-aarch64-Remove-below_hard_fp_saved_regs.patch +++ b/0120-aarch64-Remove-below-hard-fp-saved-regs-size.patch @@ -1,10 +1,7 @@ -From 5c33afb2173f68a0166bd180977cd1e547df22dc Mon Sep 17 00:00:00 2001 +From 8254e1b9cd500e0c278465a3657543477e9d1250 Mon Sep 17 00:00:00 2001 From: Richard Sandiford -Date: Tue, 12 Sep 2023 16:05:12 +0100 -Subject: [PATCH 101/157] [Backport][SME] aarch64: Remove - below_hard_fp_saved_regs_size - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2abfc867d3ba025ac2146bb21b92a93e6325dec1 +Date: Tue, 12 Sep 2023 16:08:56 +0100 +Subject: [PATCH] aarch64: Remove below_hard_fp_saved_regs_size After previous patches, it's no longer necessary to store saved_regs_size and below_hard_fp_saved_regs_size in the frame info. @@ -21,10 +18,10 @@ gcc/ 2 files changed, 21 insertions(+), 31 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index dd80ceba8..0894ed325 100644 +index a8d907df8843..ac3d3b336a37 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc -@@ -8869,9 +8869,8 @@ aarch64_layout_frame (void) +@@ -8269,9 +8269,8 @@ aarch64_layout_frame (void) /* OFFSET is now the offset of the hard frame pointer from the bottom of the callee save area. */ @@ -36,7 +33,7 @@ index dd80ceba8..0894ed325 100644 gcc_assert (!saves_below_hard_fp_p || (frame.sve_save_and_probe != INVALID_REGNUM && known_eq (frame.reg_offset[frame.sve_save_and_probe], -@@ -8941,9 +8940,8 @@ aarch64_layout_frame (void) +@@ -8341,9 +8340,8 @@ aarch64_layout_frame (void) offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); @@ -48,7 +45,7 @@ index dd80ceba8..0894ed325 100644 || (frame.hard_fp_save_and_probe != INVALID_REGNUM && known_eq (frame.reg_offset[frame.hard_fp_save_and_probe], frame.bytes_below_hard_fp))); -@@ -8952,7 +8950,7 @@ aarch64_layout_frame (void) +@@ -8352,7 +8350,7 @@ aarch64_layout_frame (void) The saving of the bottommost register counts as an implicit probe, which allows us to maintain the invariant described in the comment at expand_prologue. */ @@ -57,7 +54,7 @@ index dd80ceba8..0894ed325 100644 offset += get_frame_size (); offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); -@@ -9009,7 +9007,7 @@ aarch64_layout_frame (void) +@@ -8409,7 +8407,7 @@ aarch64_layout_frame (void) HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp; HOST_WIDE_INT const_saved_regs_size; @@ -66,7 +63,7 @@ index dd80ceba8..0894ed325 100644 frame.initial_adjust = frame.frame_size; else if (frame.frame_size.is_constant (&const_size) && const_size < max_push_offset -@@ -9022,7 +9020,7 @@ aarch64_layout_frame (void) +@@ -8422,7 +8420,7 @@ aarch64_layout_frame (void) frame.callee_adjust = const_size; } else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs) @@ -75,7 +72,7 @@ index dd80ceba8..0894ed325 100644 && const_below_saved_regs + const_saved_regs_size < 512 /* We could handle this case even with data below the saved registers, provided that that data left us with valid offsets -@@ -9041,8 +9039,7 @@ aarch64_layout_frame (void) +@@ -8441,8 +8439,7 @@ aarch64_layout_frame (void) frame.initial_adjust = frame.frame_size; } else if (saves_below_hard_fp_p @@ -85,7 +82,7 @@ index dd80ceba8..0894ed325 100644 { /* Frame in which all saves are SVE saves: -@@ -9064,7 +9061,7 @@ aarch64_layout_frame (void) +@@ -8464,7 +8461,7 @@ aarch64_layout_frame (void) [save SVE registers relative to SP] sub sp, sp, bytes_below_saved_regs */ frame.callee_adjust = const_above_fp; @@ -94,7 +91,7 @@ index dd80ceba8..0894ed325 100644 frame.final_adjust = frame.bytes_below_saved_regs; } else -@@ -9079,7 +9076,7 @@ aarch64_layout_frame (void) +@@ -8479,7 +8476,7 @@ aarch64_layout_frame (void) [save SVE registers relative to SP] sub sp, sp, bytes_below_saved_regs */ frame.initial_adjust = frame.bytes_above_hard_fp; @@ -103,7 +100,7 @@ index dd80ceba8..0894ed325 100644 frame.final_adjust = frame.bytes_below_saved_regs; } -@@ -10231,17 +10228,17 @@ aarch64_epilogue_uses (int regno) +@@ -9621,17 +9618,17 @@ aarch64_epilogue_uses (int regno) | local variables | <-- frame_pointer_rtx | | +-------------------------------+ @@ -133,10 +130,10 @@ index dd80ceba8..0894ed325 100644 | dynamic allocation | +-------------------------------+ diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index af480d9e8..292ef2eec 100644 +index 46d4693e2064..01f7751bc783 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h -@@ -801,18 +801,11 @@ struct GTY (()) aarch64_frame +@@ -871,18 +871,11 @@ struct GTY (()) aarch64_frame STACK_BOUNDARY. */ HOST_WIDE_INT saved_varargs_size; @@ -156,5 +153,5 @@ index af480d9e8..292ef2eec 100644 of the outgoing arguments) and the hard frame pointer. This value is always a multiple of STACK_BOUNDARY. */ -- -2.33.0 +2.43.5 diff --git a/0121-Backport-SME-aarch64-Tweak-constness-of-option-relat.patch b/0121-Backport-SME-aarch64-Tweak-constness-of-option-relat.patch deleted file mode 100644 index 4ababc2..0000000 --- a/0121-Backport-SME-aarch64-Tweak-constness-of-option-relat.patch +++ /dev/null @@ -1,195 +0,0 @@ -From 99c5eb58e898417632b6d9a7b2b3d288b50e9b65 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 29 Sep 2022 11:32:55 +0100 -Subject: [PATCH 022/157] [Backport][SME] aarch64: Tweak constness of - option-related data - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=60dee638c8a7ae59c033868de7e7638c88b38ed2 - -Some of the option structures have all-const member variables. -That doesn't seem necessary: we can just use const on the objects -that are supposed to be read-only. - -Also, with the new, more C++-heavy option handling, it seems -better to use constexpr for the static data, to make sure that -we're not adding unexpected overhead. - -gcc/ - * common/config/aarch64/aarch64-common.cc (aarch64_option_extension) - (processor_name_to_arch, arch_to_arch_name): Remove const from - member variables. - (all_extensions, all_cores, all_architectures): Make a constexpr. - * config/aarch64/aarch64.cc (processor): Remove const from - member variables. - (all_architectures): Make a constexpr. - * config/aarch64/driver-aarch64.cc (aarch64_core_data) - (aarch64_arch_driver_info): Remove const from member variables. - (aarch64_cpu_data, aarch64_arches): Make a constexpr. - (get_arch_from_id): Return a pointer to const. - (host_detect_local_cpu): Update accordingly. ---- - gcc/common/config/aarch64/aarch64-common.cc | 26 ++++++++++----------- - gcc/config/aarch64/aarch64.cc | 14 +++++------ - gcc/config/aarch64/driver-aarch64.cc | 15 ++++++------ - 3 files changed, 27 insertions(+), 28 deletions(-) - -diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc -index 2bdf51b8b..ac3486d71 100644 ---- a/gcc/common/config/aarch64/aarch64-common.cc -+++ b/gcc/common/config/aarch64/aarch64-common.cc -@@ -203,14 +203,14 @@ aarch64_handle_option (struct gcc_options *opts, - /* An ISA extension in the co-processor and main instruction set space. */ - struct aarch64_option_extension - { -- const char *const name; -- const uint64_t flag_canonical; -- const uint64_t flags_on; -- const uint64_t flags_off; -+ const char *name; -+ uint64_t flag_canonical; -+ uint64_t flags_on; -+ uint64_t flags_off; - }; - - /* ISA extensions in AArch64. */ --static const struct aarch64_option_extension all_extensions[] = -+static constexpr aarch64_option_extension all_extensions[] = - { - #define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \ - {NAME, AARCH64_FL_##IDENT, \ -@@ -223,21 +223,21 @@ static const struct aarch64_option_extension all_extensions[] = - - struct processor_name_to_arch - { -- const char *const processor_name; -- const enum aarch64_arch arch; -- const uint64_t flags; -+ const char *processor_name; -+ aarch64_arch arch; -+ uint64_t flags; - }; - - struct arch_to_arch_name - { -- const enum aarch64_arch arch; -- const char *const arch_name; -- const uint64_t flags; -+ aarch64_arch arch; -+ const char *arch_name; -+ uint64_t flags; - }; - - /* Map processor names to the architecture revision they implement and - the default set of architectural feature flags they support. */ --static const struct processor_name_to_arch all_cores[] = -+static constexpr processor_name_to_arch all_cores[] = - { - #define AARCH64_CORE(NAME, CORE_IDENT, C, ARCH_IDENT, E, F, G, H, I) \ - {NAME, AARCH64_ARCH_##ARCH_IDENT, feature_deps::cpu_##CORE_IDENT}, -@@ -247,7 +247,7 @@ static const struct processor_name_to_arch all_cores[] = - }; - - /* Map architecture revisions to their string representation. */ --static const struct arch_to_arch_name all_architectures[] = -+static constexpr arch_to_arch_name all_architectures[] = - { - #define AARCH64_ARCH(NAME, B, ARCH_IDENT, D, E) \ - {AARCH64_ARCH_##ARCH_IDENT, NAME, feature_deps::ARCH_IDENT ().enable}, -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 1363873b1..71db7ace1 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -2925,16 +2925,16 @@ aarch64_tuning_override_functions[] = - /* A processor implementing AArch64. */ - struct processor - { -- const char *const name; -- enum aarch64_processor ident; -- enum aarch64_processor sched_core; -- enum aarch64_arch arch; -- const uint64_t flags; -- const struct tune_params *const tune; -+ const char *name; -+ aarch64_processor ident; -+ aarch64_processor sched_core; -+ aarch64_arch arch; -+ uint64_t flags; -+ const tune_params *tune; - }; - - /* Architectures implementing AArch64. */ --static const struct processor all_architectures[] = -+static constexpr processor all_architectures[] = - { - #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, D, E) \ - {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, \ -diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc -index ddfc9451f..ee9cb65a5 100644 ---- a/gcc/config/aarch64/driver-aarch64.cc -+++ b/gcc/config/aarch64/driver-aarch64.cc -@@ -50,7 +50,7 @@ struct aarch64_core_data - unsigned char implementer_id; /* Exactly 8 bits */ - unsigned int part_no; /* 12 bits + 12 bits */ - unsigned variant; -- const uint64_t flags; -+ uint64_t flags; - }; - - #define AARCH64_BIG_LITTLE(BIG, LITTLE) \ -@@ -64,7 +64,7 @@ struct aarch64_core_data - #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \ - { CORE_NAME, #ARCH, IMP, PART, VARIANT, feature_deps::cpu_##CORE_IDENT }, - --static struct aarch64_core_data aarch64_cpu_data[] = -+static constexpr aarch64_core_data aarch64_cpu_data[] = - { - #include "aarch64-cores.def" - { NULL, NULL, INVALID_IMP, INVALID_CORE, ALL_VARIANTS, 0 } -@@ -75,14 +75,14 @@ struct aarch64_arch_driver_info - { - const char* id; - const char* name; -- const uint64_t flags; -+ uint64_t flags; - }; - - /* Skip the leading "V" in the architecture name. */ - #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \ - { #ARCH_IDENT + 1, NAME, feature_deps::ARCH_IDENT ().enable }, - --static struct aarch64_arch_driver_info aarch64_arches[] = -+static constexpr aarch64_arch_driver_info aarch64_arches[] = - { - #include "aarch64-arches.def" - {NULL, NULL, 0} -@@ -92,7 +92,7 @@ static struct aarch64_arch_driver_info aarch64_arches[] = - /* Return an aarch64_arch_driver_info for the architecture described - by ID, or NULL if ID describes something we don't know about. */ - --static struct aarch64_arch_driver_info* -+static const aarch64_arch_driver_info * - get_arch_from_id (const char* id) - { - unsigned int i = 0; -@@ -396,8 +396,7 @@ host_detect_local_cpu (int argc, const char **argv) - - if (aarch64_cpu_data[i].name == NULL) - { -- aarch64_arch_driver_info* arch_info -- = get_arch_from_id (DEFAULT_ARCH); -+ auto arch_info = get_arch_from_id (DEFAULT_ARCH); - - gcc_assert (arch_info); - -@@ -407,7 +406,7 @@ host_detect_local_cpu (int argc, const char **argv) - else if (arch) - { - const char *arch_id = aarch64_cpu_data[i].arch; -- aarch64_arch_driver_info* arch_info = get_arch_from_id (arch_id); -+ auto arch_info = get_arch_from_id (arch_id); - - /* We got some arch indentifier that's not in aarch64-arches.def? */ - if (!arch_info) --- -2.33.0 - diff --git a/0201-Backport-SME-aarch64-Make-stack-smash-canary-protect.patch b/0121-aarch64-Make-stack-smash-canary-protect-saved-registers.patch similarity index 90% rename from 0201-Backport-SME-aarch64-Make-stack-smash-canary-protect.patch rename to 0121-aarch64-Make-stack-smash-canary-protect-saved-registers.patch index c0d8bf6..96f315f 100644 --- a/0201-Backport-SME-aarch64-Make-stack-smash-canary-protect.patch +++ b/0121-aarch64-Make-stack-smash-canary-protect-saved-registers.patch @@ -1,10 +1,7 @@ -From b225443d64481bc225e29bf119d99b719c69cd3c Mon Sep 17 00:00:00 2001 +From 75c37e031408262263442f5b4cdb83d3777b6422 Mon Sep 17 00:00:00 2001 From: Richard Sandiford -Date: Tue, 12 Sep 2023 16:05:13 +0100 -Subject: [PATCH 102/157] [Backport][SME] aarch64: Make stack smash canary - protect saved registers - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3e4afea3b192c205c9a9da99f4cac65c68087eaf +Date: Tue, 12 Sep 2023 16:08:57 +0100 +Subject: [PATCH] aarch64: Make stack smash canary protect saved registers AArch64 normally puts the saved registers near the bottom of the frame, immediately above any dynamic allocations. But this means that a @@ -41,10 +38,10 @@ gcc/testsuite/ create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-9.c diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 0894ed325..8d4dd2891 100644 +index ac3d3b336a37..96c3f48fdc49 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc -@@ -8733,6 +8733,20 @@ aarch64_needs_frame_chain (void) +@@ -8133,6 +8133,20 @@ aarch64_needs_frame_chain (void) return aarch64_use_frame_pointer; } @@ -65,7 +62,7 @@ index 0894ed325..8d4dd2891 100644 /* Mark the registers that need to be saved by the callee and calculate the size of the callee-saved registers area and frame record (both FP and LR may be omitted). */ -@@ -8744,6 +8758,7 @@ aarch64_layout_frame (void) +@@ -8144,6 +8158,7 @@ aarch64_layout_frame (void) poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode); bool frame_related_fp_reg_p = false; aarch64_frame &frame = cfun->machine->frame; @@ -73,7 +70,7 @@ index 0894ed325..8d4dd2891 100644 frame.emit_frame_chain = aarch64_needs_frame_chain (); -@@ -8810,9 +8825,16 @@ aarch64_layout_frame (void) +@@ -8210,9 +8225,16 @@ aarch64_layout_frame (void) && !crtl->abi->clobbers_full_reg_p (regno)) frame.reg_offset[regno] = SLOT_REQUIRED; @@ -90,7 +87,7 @@ index 0894ed325..8d4dd2891 100644 frame.bytes_below_saved_regs = offset; frame.sve_save_and_probe = INVALID_REGNUM; -@@ -8952,15 +8974,18 @@ aarch64_layout_frame (void) +@@ -8352,15 +8374,18 @@ aarch64_layout_frame (void) at expand_prologue. */ gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0)); @@ -113,7 +110,7 @@ index 0894ed325..8d4dd2891 100644 frame.bytes_above_locals = frame.frame_size - top_of_locals; frame.initial_adjust = 0; -@@ -10225,10 +10250,10 @@ aarch64_epilogue_uses (int regno) +@@ -9615,10 +9640,10 @@ aarch64_epilogue_uses (int regno) | for register varargs | | | +-------------------------------+ @@ -126,7 +123,7 @@ index 0894ed325..8d4dd2891 100644 +-------------------------------+ | callee-saved registers | +-------------------------------+ -@@ -10240,6 +10265,10 @@ aarch64_epilogue_uses (int regno) +@@ -9630,6 +9655,10 @@ aarch64_epilogue_uses (int regno) +-------------------------------+ | SVE predicate registers | +-------------------------------+ @@ -137,7 +134,7 @@ index 0894ed325..8d4dd2891 100644 | dynamic allocation | +-------------------------------+ | padding | -@@ -10249,6 +10278,9 @@ aarch64_epilogue_uses (int regno) +@@ -9639,6 +9668,9 @@ aarch64_epilogue_uses (int regno) +-------------------------------+ | | <-- stack_pointer_rtx (aligned) @@ -147,18 +144,18 @@ index 0894ed325..8d4dd2891 100644 Dynamic stack allocations via alloca() decrease stack_pointer_rtx but leave frame_pointer_rtx and hard_frame_pointer_rtx unchanged. -@@ -10444,6 +10476,8 @@ aarch64_expand_prologue (void) +@@ -9834,6 +9866,8 @@ aarch64_expand_prologue (void) gcc_assert (known_eq (bytes_below_sp, final_adjust)); aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust, !frame_pointer_needed, true); + if (emit_frame_chain && maybe_ne (final_adjust, 0)) -+ aarch64_emit_stack_tie (hard_frame_pointer_rtx); ++ emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); } /* Return TRUE if we can use a simple_return insn. diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c new file mode 100644 -index 000000000..e71d820e3 +index 000000000000..e71d820e3654 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c @@ -0,0 +1,95 @@ @@ -259,7 +256,7 @@ index 000000000..e71d820e3 +} diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c new file mode 100644 -index 000000000..58f322aa4 +index 000000000000..58f322aa480a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c @@ -0,0 +1,33 @@ @@ -297,5 +294,5 @@ index 000000000..58f322aa4 + return 0; +} -- -2.33.0 +2.43.5 diff --git a/0122-Backport-SME-aarch64-Make-more-use-of-aarch64_featur.patch b/0122-Backport-SME-aarch64-Make-more-use-of-aarch64_featur.patch deleted file mode 100644 index 31f8b7a..0000000 --- a/0122-Backport-SME-aarch64-Make-more-use-of-aarch64_featur.patch +++ /dev/null @@ -1,394 +0,0 @@ -From bdb91009cf250fb22c21ae7f5072263492f2b08c Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 29 Sep 2022 11:32:56 +0100 -Subject: [PATCH 023/157] [Backport][SME] aarch64: Make more use of - aarch64_feature_flags - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fed55a60e5b230bc159617f26e33611073c672fd - -A previous patch added a aarch64_feature_flags typedef, to abstract -the representation of the feature flags. This patch makes existing -code use the typedef too. Hope I've caught them all! - -gcc/ - * common/config/aarch64/aarch64-common.cc: Use aarch64_feature_flags - for feature flags throughout. - * config/aarch64/aarch64-protos.h: Likewise. - * config/aarch64/aarch64-sve-builtins.h: Likewise. - * config/aarch64/aarch64-sve-builtins.cc: Likewise. - * config/aarch64/aarch64.cc: Likewise. - * config/aarch64/aarch64.opt: Likewise. - * config/aarch64/driver-aarch64.cc: Likewise. ---- - gcc/common/config/aarch64/aarch64-common.cc | 19 +++++++------- - gcc/config/aarch64/aarch64-protos.h | 5 ++-- - gcc/config/aarch64/aarch64-sve-builtins.cc | 29 ++++++++++++--------- - gcc/config/aarch64/aarch64-sve-builtins.h | 9 ++++--- - gcc/config/aarch64/aarch64.cc | 29 +++++++++++---------- - gcc/config/aarch64/aarch64.opt | 2 +- - gcc/config/aarch64/driver-aarch64.cc | 10 +++---- - 7 files changed, 56 insertions(+), 47 deletions(-) - -diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc -index ac3486d71..3efa57b26 100644 ---- a/gcc/common/config/aarch64/aarch64-common.cc -+++ b/gcc/common/config/aarch64/aarch64-common.cc -@@ -204,9 +204,9 @@ aarch64_handle_option (struct gcc_options *opts, - struct aarch64_option_extension - { - const char *name; -- uint64_t flag_canonical; -- uint64_t flags_on; -- uint64_t flags_off; -+ aarch64_feature_flags flag_canonical; -+ aarch64_feature_flags flags_on; -+ aarch64_feature_flags flags_off; - }; - - /* ISA extensions in AArch64. */ -@@ -225,14 +225,14 @@ struct processor_name_to_arch - { - const char *processor_name; - aarch64_arch arch; -- uint64_t flags; -+ aarch64_feature_flags flags; - }; - - struct arch_to_arch_name - { - aarch64_arch arch; - const char *arch_name; -- uint64_t flags; -+ aarch64_feature_flags flags; - }; - - /* Map processor names to the architecture revision they implement and -@@ -262,7 +262,7 @@ static constexpr arch_to_arch_name all_architectures[] = - a copy of the string is created and stored to INVALID_EXTENSION. */ - - enum aarch64_parse_opt_result --aarch64_parse_extension (const char *str, uint64_t *isa_flags, -+aarch64_parse_extension (const char *str, aarch64_feature_flags *isa_flags, - std::string *invalid_extension) - { - /* The extension string is parsed left to right. */ -@@ -342,8 +342,9 @@ aarch64_get_all_extension_candidates (auto_vec *candidates) - that all the "+" flags come before the "+no" flags. */ - - std::string --aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags, -- uint64_t default_arch_flags) -+aarch64_get_extension_string_for_isa_flags -+ (aarch64_feature_flags isa_flags, -+ aarch64_feature_flags default_arch_flags) - { - std::string outstr = ""; - -@@ -451,7 +452,7 @@ aarch64_rewrite_selected_cpu (const char *name) - || a_to_an->arch == aarch64_no_arch) - fatal_error (input_location, "unknown value %qs for %<-mcpu%>", name); - -- uint64_t extensions = p_to_a->flags; -+ aarch64_feature_flags extensions = p_to_a->flags; - aarch64_parse_extension (extension_str.c_str (), &extensions, NULL); - - std::string outstr = a_to_an->arch_name -diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h -index e60ce3c36..ef84df731 100644 ---- a/gcc/config/aarch64/aarch64-protos.h -+++ b/gcc/config/aarch64/aarch64-protos.h -@@ -1037,10 +1037,11 @@ bool aarch64_handle_option (struct gcc_options *, struct gcc_options *, - const struct cl_decoded_option *, location_t); - const char *aarch64_rewrite_selected_cpu (const char *name); - enum aarch64_parse_opt_result aarch64_parse_extension (const char *, -- uint64_t *, -+ aarch64_feature_flags *, - std::string *); - void aarch64_get_all_extension_candidates (auto_vec *candidates); --std::string aarch64_get_extension_string_for_isa_flags (uint64_t, uint64_t); -+std::string aarch64_get_extension_string_for_isa_flags (aarch64_feature_flags, -+ aarch64_feature_flags); - - rtl_opt_pass *make_pass_fma_steering (gcc::context *); - rtl_opt_pass *make_pass_track_speculation (gcc::context *); -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc -index c06e99339..b927a886e 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc -@@ -82,7 +82,7 @@ public: - - /* The architecture extensions that the function requires, as a set of - AARCH64_FL_* flags. */ -- uint64_t required_extensions; -+ aarch64_feature_flags required_extensions; - - /* True if the decl represents an overloaded function that needs to be - resolved by function_resolver. */ -@@ -694,13 +694,16 @@ check_required_registers (location_t location, tree fndecl) - Report an error against LOCATION if not. */ - static bool - check_required_extensions (location_t location, tree fndecl, -- uint64_t required_extensions) -+ aarch64_feature_flags required_extensions) - { -- uint64_t missing_extensions = required_extensions & ~aarch64_isa_flags; -+ auto missing_extensions = required_extensions & ~aarch64_isa_flags; - if (missing_extensions == 0) - return check_required_registers (location, fndecl); - -- static const struct { uint64_t flag; const char *name; } extensions[] = { -+ static const struct { -+ aarch64_feature_flags flag; -+ const char *name; -+ } extensions[] = { - #define AARCH64_OPT_EXTENSION(EXT_NAME, IDENT, C, D, E, F) \ - { AARCH64_FL_##IDENT, EXT_NAME }, - #include "aarch64-option-extensions.def" -@@ -992,7 +995,7 @@ function_builder::get_attributes (const function_instance &instance) - registered_function & - function_builder::add_function (const function_instance &instance, - const char *name, tree fntype, tree attrs, -- uint64_t required_extensions, -+ aarch64_feature_flags required_extensions, - bool overloaded_p, - bool placeholder_p) - { -@@ -1034,11 +1037,12 @@ function_builder::add_function (const function_instance &instance, - one-to-one mapping between "short" and "full" names, and if standard - overload resolution therefore isn't necessary. */ - void --function_builder::add_unique_function (const function_instance &instance, -- tree return_type, -- vec &argument_types, -- uint64_t required_extensions, -- bool force_direct_overloads) -+function_builder:: -+add_unique_function (const function_instance &instance, -+ tree return_type, -+ vec &argument_types, -+ aarch64_feature_flags required_extensions, -+ bool force_direct_overloads) - { - /* Add the function under its full (unique) name. */ - char *name = get_name (instance, false); -@@ -1081,8 +1085,9 @@ function_builder::add_unique_function (const function_instance &instance, - features are available as part of resolving the function to the - relevant unique function. */ - void --function_builder::add_overloaded_function (const function_instance &instance, -- uint64_t required_extensions) -+function_builder:: -+add_overloaded_function (const function_instance &instance, -+ aarch64_feature_flags required_extensions) - { - char *name = get_name (instance, true); - if (registered_function **map_value = m_overload_names.get (name)) -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h -index 24594d584..63d1db776 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.h -+++ b/gcc/config/aarch64/aarch64-sve-builtins.h -@@ -263,7 +263,7 @@ struct function_group_info - - /* The architecture extensions that the functions require, as a set of - AARCH64_FL_* flags. */ -- uint64_t required_extensions; -+ aarch64_feature_flags required_extensions; - }; - - /* Describes a single fully-resolved function (i.e. one that has a -@@ -321,8 +321,9 @@ public: - ~function_builder (); - - void add_unique_function (const function_instance &, tree, -- vec &, uint64_t, bool); -- void add_overloaded_function (const function_instance &, uint64_t); -+ vec &, aarch64_feature_flags, bool); -+ void add_overloaded_function (const function_instance &, -+ aarch64_feature_flags); - void add_overloaded_functions (const function_group_info &, - mode_suffix_index); - -@@ -338,7 +339,7 @@ private: - - registered_function &add_function (const function_instance &, - const char *, tree, tree, -- uint64_t, bool, bool); -+ aarch64_feature_flags, bool, bool); - - /* The function type to use for functions that are resolved by - function_resolver. */ -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 71db7ace1..8cb820767 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -2929,7 +2929,7 @@ struct processor - aarch64_processor ident; - aarch64_processor sched_core; - aarch64_arch arch; -- uint64_t flags; -+ aarch64_feature_flags flags; - const tune_params *tune; - }; - -@@ -17428,7 +17428,8 @@ static void initialize_aarch64_code_model (struct gcc_options *); - - static enum aarch64_parse_opt_result - aarch64_parse_arch (const char *to_parse, const struct processor **res, -- uint64_t *isa_flags, std::string *invalid_extension) -+ aarch64_feature_flags *isa_flags, -+ std::string *invalid_extension) - { - const char *ext; - const struct processor *arch; -@@ -17451,7 +17452,7 @@ aarch64_parse_arch (const char *to_parse, const struct processor **res, - if (strlen (arch->name) == len - && strncmp (arch->name, to_parse, len) == 0) - { -- uint64_t isa_temp = arch->flags; -+ auto isa_temp = arch->flags; - - if (ext != NULL) - { -@@ -17483,7 +17484,8 @@ aarch64_parse_arch (const char *to_parse, const struct processor **res, - - static enum aarch64_parse_opt_result - aarch64_parse_cpu (const char *to_parse, const struct processor **res, -- uint64_t *isa_flags, std::string *invalid_extension) -+ aarch64_feature_flags *isa_flags, -+ std::string *invalid_extension) - { - const char *ext; - const struct processor *cpu; -@@ -17505,8 +17507,7 @@ aarch64_parse_cpu (const char *to_parse, const struct processor **res, - { - if (strlen (cpu->name) == len && strncmp (cpu->name, to_parse, len) == 0) - { -- uint64_t isa_temp = cpu->flags; -- -+ auto isa_temp = cpu->flags; - - if (ext != NULL) - { -@@ -18137,7 +18138,7 @@ aarch64_print_hint_for_extensions (const std::string &str) - - static bool - aarch64_validate_mcpu (const char *str, const struct processor **res, -- uint64_t *isa_flags) -+ aarch64_feature_flags *isa_flags) - { - std::string invalid_extension; - enum aarch64_parse_opt_result parse_res -@@ -18351,7 +18352,7 @@ aarch64_validate_mbranch_protection (const char *const_str) - - static bool - aarch64_validate_march (const char *str, const struct processor **res, -- uint64_t *isa_flags) -+ aarch64_feature_flags *isa_flags) - { - std::string invalid_extension; - enum aarch64_parse_opt_result parse_res -@@ -18441,8 +18442,8 @@ aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value) - static void - aarch64_override_options (void) - { -- uint64_t cpu_isa = 0; -- uint64_t arch_isa = 0; -+ aarch64_feature_flags cpu_isa = 0; -+ aarch64_feature_flags arch_isa = 0; - aarch64_isa_flags = 0; - - const struct processor *cpu = NULL; -@@ -18890,7 +18891,7 @@ static bool - aarch64_handle_attr_isa_flags (char *str) - { - enum aarch64_parse_opt_result parse_res; -- uint64_t isa_flags = aarch64_isa_flags; -+ auto isa_flags = aarch64_isa_flags; - - /* We allow "+nothing" in the beginning to clear out all architectural - features if the user wants to handpick specific features. */ -@@ -19162,7 +19163,7 @@ aarch64_process_target_attr (tree args) - { - /* Check if token is possibly an arch extension without - leading '+'. */ -- uint64_t isa_temp = 0; -+ aarch64_feature_flags isa_temp = 0; - auto with_plus = std::string ("+") + token; - enum aarch64_parse_opt_result ext_res - = aarch64_parse_extension (with_plus.c_str (), &isa_temp, nullptr); -@@ -22771,7 +22772,7 @@ aarch64_declare_function_name (FILE *stream, const char* name, - const struct processor *this_arch - = aarch64_get_arch (targ_options->x_selected_arch); - -- uint64_t isa_flags = targ_options->x_aarch64_isa_flags; -+ auto isa_flags = targ_options->x_aarch64_isa_flags; - std::string extension - = aarch64_get_extension_string_for_isa_flags (isa_flags, - this_arch->flags); -@@ -22901,7 +22902,7 @@ aarch64_start_file (void) - - const struct processor *default_arch - = aarch64_get_arch (default_options->x_selected_arch); -- uint64_t default_isa_flags = default_options->x_aarch64_isa_flags; -+ auto default_isa_flags = default_options->x_aarch64_isa_flags; - std::string extension - = aarch64_get_extension_string_for_isa_flags (default_isa_flags, - default_arch->flags); -diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt -index 836a3c784..47ec7824f 100644 ---- a/gcc/config/aarch64/aarch64.opt -+++ b/gcc/config/aarch64/aarch64.opt -@@ -28,7 +28,7 @@ TargetVariable - enum aarch64_arch selected_arch = aarch64_no_arch - - TargetVariable --uint64_t aarch64_isa_flags = 0 -+aarch64_feature_flags aarch64_isa_flags = 0 - - TargetVariable - unsigned aarch64_enable_bti = 2 -diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc -index ee9cb65a5..2ae47c020 100644 ---- a/gcc/config/aarch64/driver-aarch64.cc -+++ b/gcc/config/aarch64/driver-aarch64.cc -@@ -31,7 +31,7 @@ - struct aarch64_arch_extension - { - const char *ext; -- uint64_t flag; -+ aarch64_feature_flags flag; - const char *feat_string; - }; - -@@ -50,7 +50,7 @@ struct aarch64_core_data - unsigned char implementer_id; /* Exactly 8 bits */ - unsigned int part_no; /* 12 bits + 12 bits */ - unsigned variant; -- uint64_t flags; -+ aarch64_feature_flags flags; - }; - - #define AARCH64_BIG_LITTLE(BIG, LITTLE) \ -@@ -75,7 +75,7 @@ struct aarch64_arch_driver_info - { - const char* id; - const char* name; -- uint64_t flags; -+ aarch64_feature_flags flags; - }; - - /* Skip the leading "V" in the architecture name. */ -@@ -261,8 +261,8 @@ host_detect_local_cpu (int argc, const char **argv) - unsigned int variants[2] = { ALL_VARIANTS, ALL_VARIANTS }; - unsigned int n_variants = 0; - bool processed_exts = false; -- uint64_t extension_flags = 0; -- uint64_t default_flags = 0; -+ aarch64_feature_flags extension_flags = 0; -+ aarch64_feature_flags default_flags = 0; - std::string buf; - size_t sep_pos = -1; - char *fcpu_info; --- -2.33.0 - diff --git a/0253-aarch64-Fix-return-register-handling-in-untyped_call.patch b/0122-aarch64-Fix-return-register-handling-in-untyped_call.patch similarity index 100% rename from 0253-aarch64-Fix-return-register-handling-in-untyped_call.patch rename to 0122-aarch64-Fix-return-register-handling-in-untyped_call.patch diff --git a/0123-Backport-SME-aarch64-Tweak-contents-of-flags_on-off-.patch b/0123-Backport-SME-aarch64-Tweak-contents-of-flags_on-off-.patch deleted file mode 100644 index ec1c5f6..0000000 --- a/0123-Backport-SME-aarch64-Tweak-contents-of-flags_on-off-.patch +++ /dev/null @@ -1,70 +0,0 @@ -From eb92c185c1c71edcbd83b1c66fe4f9e7d52a98b3 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 29 Sep 2022 11:32:56 +0100 -Subject: [PATCH 024/157] [Backport][SME] aarch64: Tweak contents of - flags_on/off fields - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bb7f43b62a58a0f0326fd3060f0bd43e6f3ef971 - -After previous changes, it's more convenient if the flags_on and -flags_off fields of all_extensions include the feature flag itself. - -gcc/ - * common/config/aarch64/aarch64-common.cc (all_extensions): - Include the feature flag in flags_on and flags_off. - (aarch64_parse_extension): Update accordingly. - (aarch64_get_extension_string_for_isa_flags): Likewise. ---- - gcc/common/config/aarch64/aarch64-common.cc | 14 ++++++-------- - 1 file changed, 6 insertions(+), 8 deletions(-) - -diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc -index 3efa57b26..752ba5632 100644 ---- a/gcc/common/config/aarch64/aarch64-common.cc -+++ b/gcc/common/config/aarch64/aarch64-common.cc -@@ -213,10 +213,8 @@ struct aarch64_option_extension - static constexpr aarch64_option_extension all_extensions[] = - { - #define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \ -- {NAME, AARCH64_FL_##IDENT, \ -- feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \ -- feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \ -- & ~AARCH64_FL_##IDENT}, -+ {NAME, AARCH64_FL_##IDENT, feature_deps::IDENT ().explicit_on, \ -+ feature_deps::get_flags_off (feature_deps::root_off_##IDENT)}, - #include "config/aarch64/aarch64-option-extensions.def" - {NULL, 0, 0, 0} - }; -@@ -304,9 +302,9 @@ aarch64_parse_extension (const char *str, aarch64_feature_flags *isa_flags, - { - /* Add or remove the extension. */ - if (adding_ext) -- *isa_flags |= (opt->flags_on | opt->flag_canonical); -+ *isa_flags |= opt->flags_on; - else -- *isa_flags &= ~(opt->flags_off | opt->flag_canonical); -+ *isa_flags &= ~opt->flags_off; - break; - } - } -@@ -380,7 +378,7 @@ aarch64_get_extension_string_for_isa_flags - - if ((flags & isa_flags & (explicit_flags | ~current_flags)) == flags) - { -- current_flags |= opt.flag_canonical | opt.flags_on; -+ current_flags |= opt.flags_on; - added |= opt.flag_canonical; - } - } -@@ -395,7 +393,7 @@ aarch64_get_extension_string_for_isa_flags - for (auto &opt : all_extensions) - if (opt.flag_canonical & current_flags & ~isa_flags) - { -- current_flags &= ~(opt.flag_canonical | opt.flags_off); -+ current_flags &= ~opt.flags_off; - outstr += "+no"; - outstr += opt.name; - } --- -2.33.0 - diff --git a/0254-aarch64-Fix-loose-ldpstp-check.patch b/0123-aarch64-Fix-loose-ldpstp-check.patch similarity index 100% rename from 0254-aarch64-Fix-loose-ldpstp-check.patch rename to 0123-aarch64-Fix-loose-ldpstp-check.patch diff --git a/0124-Backport-SME-aarch64-Tweak-handling-of-mgeneral-regs.patch b/0124-Backport-SME-aarch64-Tweak-handling-of-mgeneral-regs.patch deleted file mode 100644 index 55c27b3..0000000 --- a/0124-Backport-SME-aarch64-Tweak-handling-of-mgeneral-regs.patch +++ /dev/null @@ -1,370 +0,0 @@ -From 91f7471cbc7dec42673b58a1896330d64eb6be2a Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 29 Sep 2022 11:32:57 +0100 -Subject: [PATCH 025/157] [Backport][SME] aarch64: Tweak handling of - -mgeneral-regs-only - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2a269bda9e7b8f9353699d0c965e7e9246500aa0 - --mgeneral-regs-only is effectively "+nofp for the compiler without -changing the assembler's ISA flags". Currently that's implemented -by making TARGET_FLOAT, TARGET_SIMD and TARGET_SVE depend on -!TARGET_GENERAL_REGS_ONLY and then making any feature that needs FP -registers depend (directly or indirectly) on one of those three TARGET -macros. The problem is that it's easy to forgot to do the last bit. - -This patch instead represents the distinction between "assemnbler -ISA flags" and "compiler ISA flags" more directly, funnelling -all updates through a new function that sets both sets of flags -together. - -gcc/ - * config/aarch64/aarch64.opt (aarch64_asm_isa_flags): New variable. - * config/aarch64/aarch64.h (aarch64_asm_isa_flags) - (aarch64_isa_flags): Redefine as read-only macros. - (TARGET_SIMD, TARGET_FLOAT, TARGET_SVE): Don't depend on - !TARGET_GENERAL_REGS_ONLY. - * common/config/aarch64/aarch64-common.cc - (aarch64_set_asm_isa_flags): New function. - (aarch64_handle_option): Call it when updating -mgeneral-regs. - * config/aarch64/aarch64-protos.h (aarch64_simd_switcher): Replace - m_old_isa_flags with m_old_asm_isa_flags. - (aarch64_set_asm_isa_flags): Declare. - * config/aarch64/aarch64-builtins.cc - (aarch64_simd_switcher::aarch64_simd_switcher) - (aarch64_simd_switcher::~aarch64_simd_switcher): Save and restore - aarch64_asm_isa_flags instead of aarch64_isa_flags. - * config/aarch64/aarch64-sve-builtins.cc - (check_required_extensions): Use aarch64_asm_isa_flags instead - of aarch64_isa_flags. - * config/aarch64/aarch64.cc (aarch64_set_asm_isa_flags): New function. - (aarch64_override_options, aarch64_handle_attr_arch) - (aarch64_handle_attr_cpu, aarch64_handle_attr_isa_flags): Use - aarch64_set_asm_isa_flags to set the ISA flags. - (aarch64_option_print, aarch64_declare_function_name) - (aarch64_start_file): Use aarch64_asm_isa_flags instead - of aarch64_isa_flags. - (aarch64_can_inline_p): Check aarch64_asm_isa_flags as well as - aarch64_isa_flags. ---- - gcc/common/config/aarch64/aarch64-common.cc | 12 ++++++ - gcc/config/aarch64/aarch64-builtins.cc | 6 +-- - gcc/config/aarch64/aarch64-protos.h | 5 ++- - gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +- - gcc/config/aarch64/aarch64.cc | 45 ++++++++++++++------- - gcc/config/aarch64/aarch64.h | 17 ++++++-- - gcc/config/aarch64/aarch64.opt | 3 ++ - 7 files changed, 68 insertions(+), 22 deletions(-) - -diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc -index 752ba5632..c64b4987e 100644 ---- a/gcc/common/config/aarch64/aarch64-common.cc -+++ b/gcc/common/config/aarch64/aarch64-common.cc -@@ -137,6 +137,17 @@ reset_tsv110_option () - } - } - -+/* Set OPTS->x_aarch64_asm_isa_flags to FLAGS and update -+ OPTS->x_aarch64_isa_flags accordingly. */ -+void -+aarch64_set_asm_isa_flags (gcc_options *opts, aarch64_feature_flags flags) -+{ -+ opts->x_aarch64_asm_isa_flags = flags; -+ opts->x_aarch64_isa_flags = flags; -+ if (opts->x_target_flags & MASK_GENERAL_REGS_ONLY) -+ opts->x_aarch64_isa_flags &= ~feature_deps::get_flags_off (AARCH64_FL_FP); -+} -+ - /* Implement TARGET_HANDLE_OPTION. - This function handles the target specific options for CPU/target selection. - -@@ -174,6 +185,7 @@ aarch64_handle_option (struct gcc_options *opts, - - case OPT_mgeneral_regs_only: - opts->x_target_flags |= MASK_GENERAL_REGS_ONLY; -+ aarch64_set_asm_isa_flags (opts, opts->x_aarch64_asm_isa_flags); - return true; - - case OPT_mfix_cortex_a53_835769: -diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc -index 42276e7ca..015e9d975 100644 ---- a/gcc/config/aarch64/aarch64-builtins.cc -+++ b/gcc/config/aarch64/aarch64-builtins.cc -@@ -1336,20 +1336,20 @@ aarch64_scalar_builtin_type_p (aarch64_simd_type t) - /* Enable AARCH64_FL_* flags EXTRA_FLAGS on top of the base Advanced SIMD - set. */ - aarch64_simd_switcher::aarch64_simd_switcher (unsigned int extra_flags) -- : m_old_isa_flags (aarch64_isa_flags), -+ : m_old_asm_isa_flags (aarch64_asm_isa_flags), - m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY) - { - /* Changing the ISA flags should be enough here. We shouldn't need to - pay the compile-time cost of a full target switch. */ -- aarch64_isa_flags = AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags; - global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY; -+ aarch64_set_asm_isa_flags (AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags); - } - - aarch64_simd_switcher::~aarch64_simd_switcher () - { - if (m_old_general_regs_only) - global_options.x_target_flags |= MASK_GENERAL_REGS_ONLY; -- aarch64_isa_flags = m_old_isa_flags; -+ aarch64_set_asm_isa_flags (m_old_asm_isa_flags); - } - - /* Implement #pragma GCC aarch64 "arm_neon.h". */ -diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h -index ef84df731..86e444a60 100644 ---- a/gcc/config/aarch64/aarch64-protos.h -+++ b/gcc/config/aarch64/aarch64-protos.h -@@ -747,7 +747,7 @@ public: - ~aarch64_simd_switcher (); - - private: -- unsigned long m_old_isa_flags; -+ unsigned long m_old_asm_isa_flags; - bool m_old_general_regs_only; - }; - -@@ -1032,7 +1032,10 @@ extern bool aarch64_classify_address (struct aarch64_address_info *, rtx, - machine_mode, bool, - aarch64_addr_query_type = ADDR_QUERY_M); - -+void aarch64_set_asm_isa_flags (aarch64_feature_flags); -+ - /* Defined in common/config/aarch64-common.cc. */ -+void aarch64_set_asm_isa_flags (gcc_options *, aarch64_feature_flags); - bool aarch64_handle_option (struct gcc_options *, struct gcc_options *, - const struct cl_decoded_option *, location_t); - const char *aarch64_rewrite_selected_cpu (const char *name); -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc -index b927a886e..a70e3a6b4 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc -@@ -696,7 +696,7 @@ static bool - check_required_extensions (location_t location, tree fndecl, - aarch64_feature_flags required_extensions) - { -- auto missing_extensions = required_extensions & ~aarch64_isa_flags; -+ auto missing_extensions = required_extensions & ~aarch64_asm_isa_flags; - if (missing_extensions == 0) - return check_required_registers (location, fndecl); - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 8cb820767..3e83e48ec 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -18432,10 +18432,19 @@ aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value) - return (int) value / 64; - } - -+/* Set the global aarch64_asm_isa_flags to FLAGS and update -+ aarch64_isa_flags accordingly. */ -+ -+void -+aarch64_set_asm_isa_flags (aarch64_feature_flags flags) -+{ -+ aarch64_set_asm_isa_flags (&global_options, flags); -+} -+ - /* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning - and is used to parse the -m{cpu,tune,arch} strings and setup the initial - tuning structs. In particular it must set selected_tune and -- aarch64_isa_flags that define the available ISA features and tuning -+ aarch64_asm_isa_flags that define the available ISA features and tuning - decisions. It must also set selected_arch as this will be used to - output the .arch asm tags for each function. */ - -@@ -18444,7 +18453,7 @@ aarch64_override_options (void) - { - aarch64_feature_flags cpu_isa = 0; - aarch64_feature_flags arch_isa = 0; -- aarch64_isa_flags = 0; -+ aarch64_set_asm_isa_flags (0); - - const struct processor *cpu = NULL; - const struct processor *arch = NULL; -@@ -18484,25 +18493,25 @@ aarch64_override_options (void) - } - - selected_arch = arch->arch; -- aarch64_isa_flags = arch_isa; -+ aarch64_set_asm_isa_flags (arch_isa); - } - else if (cpu) - { - selected_arch = cpu->arch; -- aarch64_isa_flags = cpu_isa; -+ aarch64_set_asm_isa_flags (cpu_isa); - } - else if (arch) - { - cpu = &all_cores[arch->ident]; - selected_arch = arch->arch; -- aarch64_isa_flags = arch_isa; -+ aarch64_set_asm_isa_flags (arch_isa); - } - else - { - /* No -mcpu or -march specified, so use the default CPU. */ - cpu = &all_cores[TARGET_CPU_DEFAULT]; - selected_arch = cpu->arch; -- aarch64_isa_flags = cpu->flags; -+ aarch64_set_asm_isa_flags (cpu->flags); - } - - selected_tune = tune ? tune->ident : cpu->ident; -@@ -18644,7 +18653,7 @@ aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr) - = aarch64_get_tune_cpu (ptr->x_selected_tune); - const struct processor *arch = aarch64_get_arch (ptr->x_selected_arch); - std::string extension -- = aarch64_get_extension_string_for_isa_flags (ptr->x_aarch64_isa_flags, -+ = aarch64_get_extension_string_for_isa_flags (ptr->x_aarch64_asm_isa_flags, - arch->flags); - - fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name); -@@ -18752,13 +18761,15 @@ aarch64_handle_attr_arch (const char *str) - { - const struct processor *tmp_arch = NULL; - std::string invalid_extension; -+ aarch64_feature_flags tmp_flags; - enum aarch64_parse_opt_result parse_res -- = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags, &invalid_extension); -+ = aarch64_parse_arch (str, &tmp_arch, &tmp_flags, &invalid_extension); - - if (parse_res == AARCH64_PARSE_OK) - { - gcc_assert (tmp_arch); - selected_arch = tmp_arch->arch; -+ aarch64_set_asm_isa_flags (tmp_flags); - return true; - } - -@@ -18790,14 +18801,16 @@ aarch64_handle_attr_cpu (const char *str) - { - const struct processor *tmp_cpu = NULL; - std::string invalid_extension; -+ aarch64_feature_flags tmp_flags; - enum aarch64_parse_opt_result parse_res -- = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags, &invalid_extension); -+ = aarch64_parse_cpu (str, &tmp_cpu, &tmp_flags, &invalid_extension); - - if (parse_res == AARCH64_PARSE_OK) - { - gcc_assert (tmp_cpu); - selected_tune = tmp_cpu->ident; - selected_arch = tmp_cpu->arch; -+ aarch64_set_asm_isa_flags (tmp_flags); - return true; - } - -@@ -18891,7 +18904,7 @@ static bool - aarch64_handle_attr_isa_flags (char *str) - { - enum aarch64_parse_opt_result parse_res; -- auto isa_flags = aarch64_isa_flags; -+ auto isa_flags = aarch64_asm_isa_flags; - - /* We allow "+nothing" in the beginning to clear out all architectural - features if the user wants to handpick specific features. */ -@@ -18906,7 +18919,7 @@ aarch64_handle_attr_isa_flags (char *str) - - if (parse_res == AARCH64_PARSE_OK) - { -- aarch64_isa_flags = isa_flags; -+ aarch64_set_asm_isa_flags (isa_flags); - return true; - } - -@@ -19328,8 +19341,12 @@ aarch64_can_inline_p (tree caller, tree callee) - : target_option_default_node); - - /* Callee's ISA flags should be a subset of the caller's. */ -+ if ((caller_opts->x_aarch64_asm_isa_flags -+ & callee_opts->x_aarch64_asm_isa_flags) -+ != callee_opts->x_aarch64_asm_isa_flags) -+ return false; - if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags) -- != callee_opts->x_aarch64_isa_flags) -+ != callee_opts->x_aarch64_isa_flags) - return false; - - /* Allow non-strict aligned functions inlining into strict -@@ -22772,7 +22789,7 @@ aarch64_declare_function_name (FILE *stream, const char* name, - const struct processor *this_arch - = aarch64_get_arch (targ_options->x_selected_arch); - -- auto isa_flags = targ_options->x_aarch64_isa_flags; -+ auto isa_flags = targ_options->x_aarch64_asm_isa_flags; - std::string extension - = aarch64_get_extension_string_for_isa_flags (isa_flags, - this_arch->flags); -@@ -22902,7 +22919,7 @@ aarch64_start_file (void) - - const struct processor *default_arch - = aarch64_get_arch (default_options->x_selected_arch); -- auto default_isa_flags = default_options->x_aarch64_isa_flags; -+ auto default_isa_flags = default_options->x_aarch64_asm_isa_flags; - std::string extension - = aarch64_get_extension_string_for_isa_flags (default_isa_flags, - default_arch->flags); -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 50a2ef444..521031efe 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -22,6 +22,17 @@ - #ifndef GCC_AARCH64_H - #define GCC_AARCH64_H - -+/* Make these flags read-only so that all uses go via -+ aarch64_set_asm_isa_flags. */ -+#ifndef GENERATOR_FILE -+#undef aarch64_asm_isa_flags -+#define aarch64_asm_isa_flags \ -+ ((aarch64_feature_flags) global_options.x_aarch64_asm_isa_flags) -+#undef aarch64_isa_flags -+#define aarch64_isa_flags \ -+ ((aarch64_feature_flags) global_options.x_aarch64_isa_flags) -+#endif -+ - /* Target CPU builtins. */ - #define TARGET_CPU_CPP_BUILTINS() \ - aarch64_cpu_cpp_builtins (pfile) -@@ -51,8 +62,8 @@ - - /* AdvSIMD is supported in the default configuration, unless disabled by - -mgeneral-regs-only or by the +nosimd extension. */ --#define TARGET_SIMD (!TARGET_GENERAL_REGS_ONLY && AARCH64_ISA_SIMD) --#define TARGET_FLOAT (!TARGET_GENERAL_REGS_ONLY && AARCH64_ISA_FP) -+#define TARGET_SIMD (AARCH64_ISA_SIMD) -+#define TARGET_FLOAT (AARCH64_ISA_FP) - - #define UNITS_PER_WORD 8 - -@@ -242,7 +253,7 @@ enum class aarch64_feature : unsigned char { - #define TARGET_DOTPROD (TARGET_SIMD && AARCH64_ISA_DOTPROD) - - /* SVE instructions, enabled through +sve. */ --#define TARGET_SVE (!TARGET_GENERAL_REGS_ONLY && AARCH64_ISA_SVE) -+#define TARGET_SVE (AARCH64_ISA_SVE) - - /* SVE2 instructions, enabled through +sve2. */ - #define TARGET_SVE2 (TARGET_SVE && AARCH64_ISA_SVE2) -diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt -index 47ec7824f..5f507abd4 100644 ---- a/gcc/config/aarch64/aarch64.opt -+++ b/gcc/config/aarch64/aarch64.opt -@@ -27,6 +27,9 @@ enum aarch64_processor selected_tune = aarch64_none - TargetVariable - enum aarch64_arch selected_arch = aarch64_no_arch - -+TargetVariable -+aarch64_feature_flags aarch64_asm_isa_flags = 0 -+ - TargetVariable - aarch64_feature_flags aarch64_isa_flags = 0 - --- -2.33.0 - diff --git a/0125-Backport-SME-aarch64-Remove-redundant-TARGET_-checks.patch b/0125-Backport-SME-aarch64-Remove-redundant-TARGET_-checks.patch deleted file mode 100644 index f935069..0000000 --- a/0125-Backport-SME-aarch64-Remove-redundant-TARGET_-checks.patch +++ /dev/null @@ -1,453 +0,0 @@ -From 77a86d955dd1c9cd8c7fc35e6caf0cb707799129 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 29 Sep 2022 11:32:57 +0100 -Subject: [PATCH 026/157] [Backport][SME] aarch64: Remove redundant TARGET_* - checks - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a31641840af2c40cf36036fa472df34d4a4402c3 - -After previous patches, it's possible to remove TARGET_* -options that are redundant due to (IMO) obvious dependencies. - -gcc/ - * config/aarch64/aarch64.h (TARGET_CRYPTO, TARGET_SHA3, TARGET_SM4) - (TARGET_DOTPROD): Don't depend on TARGET_SIMD. - (TARGET_AES, TARGET_SHA2): Likewise. Remove TARGET_CRYPTO test. - (TARGET_FP_F16INST): Don't depend on TARGET_FLOAT. - (TARGET_SVE2, TARGET_SVE_F32MM, TARGET_SVE_F64MM): Don't depend - on TARGET_SVE. - (TARGET_SVE2_AES, TARGET_SVE2_BITPERM, TARGET_SVE2_SHA3) - (TARGET_SVE2_SM4): Don't depend on TARGET_SVE2. - (TARGET_F32MM, TARGET_F64MM): Delete. - * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Guard - float macros with just TARGET_FLOAT rather than TARGET_FLOAT - || TARGET_SIMD. - * config/aarch64/aarch64-simd.md (copysign3): Depend - only on TARGET_SIMD, rather than TARGET_FLOAT && TARGET_SIMD. - (aarch64_crypto_aesv16qi): Depend only on TARGET_AES, - rather than TARGET_SIMD && TARGET_AES. - (aarch64_crypto_aesv16qi): Likewise. - (*aarch64_crypto_aese_fused): Likewise. - (*aarch64_crypto_aesd_fused): Likewise. - (aarch64_crypto_pmulldi): Likewise. - (aarch64_crypto_pmullv2di): Likewise. - (aarch64_crypto_sha1hsi): Likewise TARGET_SHA2. - (aarch64_crypto_sha1hv4si): Likewise. - (aarch64_be_crypto_sha1hv4si): Likewise. - (aarch64_crypto_sha1su1v4si): Likewise. - (aarch64_crypto_sha1v4si): Likewise. - (aarch64_crypto_sha1su0v4si): Likewise. - (aarch64_crypto_sha256hv4si): Likewise. - (aarch64_crypto_sha256su0v4si): Likewise. - (aarch64_crypto_sha256su1v4si): Likewise. - (aarch64_crypto_sha512hqv2di): Likewise TARGET_SHA3. - (aarch64_crypto_sha512su0qv2di): Likewise. - (aarch64_crypto_sha512su1qv2di, eor3q4): Likewise. - (aarch64_rax1qv2di, aarch64_xarqv2di, bcaxq4): Likewise. - (aarch64_sm3ss1qv4si): Likewise TARGET_SM4. - (aarch64_sm3ttqv4si): Likewise. - (aarch64_sm3partwqv4si): Likewise. - (aarch64_sm4eqv4si, aarch64_sm4ekeyqv4si): Likewise. - * config/aarch64/aarch64.md (dihf2) - (copysign3, copysign3_insn) - (xorsign3): Remove redundant TARGET_FLOAT condition. ---- - gcc/config/aarch64/aarch64-c.cc | 2 +- - gcc/config/aarch64/aarch64-simd.md | 56 +++++++++++++++--------------- - gcc/config/aarch64/aarch64.h | 30 ++++++++-------- - gcc/config/aarch64/aarch64.md | 8 ++--- - 4 files changed, 47 insertions(+), 49 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc -index 18c9b975b..2dfe2b8f8 100644 ---- a/gcc/config/aarch64/aarch64-c.cc -+++ b/gcc/config/aarch64/aarch64-c.cc -@@ -92,7 +92,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) - - aarch64_def_or_undef (TARGET_FLOAT, "__ARM_FEATURE_FMA", pfile); - -- if (TARGET_FLOAT || TARGET_SIMD) -+ if (TARGET_FLOAT) - { - builtin_define_with_int_value ("__ARM_FP", 0x0E); - builtin_define ("__ARM_FP16_FORMAT_IEEE"); -diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md -index de92802f5..a47b39281 100644 ---- a/gcc/config/aarch64/aarch64-simd.md -+++ b/gcc/config/aarch64/aarch64-simd.md -@@ -693,7 +693,7 @@ - [(match_operand:VHSDF 0 "register_operand") - (match_operand:VHSDF 1 "register_operand") - (match_operand:VHSDF 2 "register_operand")] -- "TARGET_FLOAT && TARGET_SIMD" -+ "TARGET_SIMD" - { - rtx v_bitmask = gen_reg_rtx (mode); - int bits = GET_MODE_UNIT_BITSIZE (mode) - 1; -@@ -8352,7 +8352,7 @@ - (match_operand:V16QI 1 "register_operand" "%0") - (match_operand:V16QI 2 "register_operand" "w"))] - CRYPTO_AES))] -- "TARGET_SIMD && TARGET_AES" -+ "TARGET_AES" - "aes\\t%0.16b, %2.16b" - [(set_attr "type" "crypto_aese")] - ) -@@ -8361,7 +8361,7 @@ - [(set (match_operand:V16QI 0 "register_operand" "=w") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")] - CRYPTO_AESMC))] -- "TARGET_SIMD && TARGET_AES" -+ "TARGET_AES" - "aes\\t%0.16b, %1.16b" - [(set_attr "type" "crypto_aesmc")] - ) -@@ -8380,7 +8380,7 @@ - (match_operand:V16QI 2 "register_operand" "w"))] - UNSPEC_AESE)] - UNSPEC_AESMC))] -- "TARGET_SIMD && TARGET_AES -+ "TARGET_AES - && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" - "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b" - [(set_attr "type" "crypto_aese") -@@ -8401,7 +8401,7 @@ - (match_operand:V16QI 2 "register_operand" "w"))] - UNSPEC_AESD)] - UNSPEC_AESIMC))] -- "TARGET_SIMD && TARGET_AES -+ "TARGET_AES - && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" - "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b" - [(set_attr "type" "crypto_aese") -@@ -8415,7 +8415,7 @@ - (unspec:SI [(match_operand:SI 1 - "register_operand" "w")] - UNSPEC_SHA1H))] -- "TARGET_SIMD && TARGET_SHA2" -+ "TARGET_SHA2" - "sha1h\\t%s0, %s1" - [(set_attr "type" "crypto_sha1_fast")] - ) -@@ -8425,7 +8425,7 @@ - (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") - (parallel [(const_int 0)]))] - UNSPEC_SHA1H))] -- "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN" -+ "TARGET_SHA2 && !BYTES_BIG_ENDIAN" - "sha1h\\t%s0, %s1" - [(set_attr "type" "crypto_sha1_fast")] - ) -@@ -8435,7 +8435,7 @@ - (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") - (parallel [(const_int 3)]))] - UNSPEC_SHA1H))] -- "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN" -+ "TARGET_SHA2 && BYTES_BIG_ENDIAN" - "sha1h\\t%s0, %s1" - [(set_attr "type" "crypto_sha1_fast")] - ) -@@ -8445,7 +8445,7 @@ - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") - (match_operand:V4SI 2 "register_operand" "w")] - UNSPEC_SHA1SU1))] -- "TARGET_SIMD && TARGET_SHA2" -+ "TARGET_SHA2" - "sha1su1\\t%0.4s, %2.4s" - [(set_attr "type" "crypto_sha1_fast")] - ) -@@ -8456,7 +8456,7 @@ - (match_operand:SI 2 "register_operand" "w") - (match_operand:V4SI 3 "register_operand" "w")] - CRYPTO_SHA1))] -- "TARGET_SIMD && TARGET_SHA2" -+ "TARGET_SHA2" - "sha1\\t%q0, %s2, %3.4s" - [(set_attr "type" "crypto_sha1_slow")] - ) -@@ -8467,7 +8467,7 @@ - (match_operand:V4SI 2 "register_operand" "w") - (match_operand:V4SI 3 "register_operand" "w")] - UNSPEC_SHA1SU0))] -- "TARGET_SIMD && TARGET_SHA2" -+ "TARGET_SHA2" - "sha1su0\\t%0.4s, %2.4s, %3.4s" - [(set_attr "type" "crypto_sha1_xor")] - ) -@@ -8480,7 +8480,7 @@ - (match_operand:V4SI 2 "register_operand" "w") - (match_operand:V4SI 3 "register_operand" "w")] - CRYPTO_SHA256))] -- "TARGET_SIMD && TARGET_SHA2" -+ "TARGET_SHA2" - "sha256h\\t%q0, %q2, %3.4s" - [(set_attr "type" "crypto_sha256_slow")] - ) -@@ -8490,7 +8490,7 @@ - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") - (match_operand:V4SI 2 "register_operand" "w")] - UNSPEC_SHA256SU0))] -- "TARGET_SIMD && TARGET_SHA2" -+ "TARGET_SHA2" - "sha256su0\\t%0.4s, %2.4s" - [(set_attr "type" "crypto_sha256_fast")] - ) -@@ -8501,7 +8501,7 @@ - (match_operand:V4SI 2 "register_operand" "w") - (match_operand:V4SI 3 "register_operand" "w")] - UNSPEC_SHA256SU1))] -- "TARGET_SIMD && TARGET_SHA2" -+ "TARGET_SHA2" - "sha256su1\\t%0.4s, %2.4s, %3.4s" - [(set_attr "type" "crypto_sha256_slow")] - ) -@@ -8514,7 +8514,7 @@ - (match_operand:V2DI 2 "register_operand" "w") - (match_operand:V2DI 3 "register_operand" "w")] - CRYPTO_SHA512))] -- "TARGET_SIMD && TARGET_SHA3" -+ "TARGET_SHA3" - "sha512h\\t%q0, %q2, %3.2d" - [(set_attr "type" "crypto_sha512")] - ) -@@ -8524,7 +8524,7 @@ - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") - (match_operand:V2DI 2 "register_operand" "w")] - UNSPEC_SHA512SU0))] -- "TARGET_SIMD && TARGET_SHA3" -+ "TARGET_SHA3" - "sha512su0\\t%0.2d, %2.2d" - [(set_attr "type" "crypto_sha512")] - ) -@@ -8535,7 +8535,7 @@ - (match_operand:V2DI 2 "register_operand" "w") - (match_operand:V2DI 3 "register_operand" "w")] - UNSPEC_SHA512SU1))] -- "TARGET_SIMD && TARGET_SHA3" -+ "TARGET_SHA3" - "sha512su1\\t%0.2d, %2.2d, %3.2d" - [(set_attr "type" "crypto_sha512")] - ) -@@ -8549,7 +8549,7 @@ - (match_operand:VQ_I 2 "register_operand" "w") - (match_operand:VQ_I 3 "register_operand" "w")) - (match_operand:VQ_I 1 "register_operand" "w")))] -- "TARGET_SIMD && TARGET_SHA3" -+ "TARGET_SHA3" - "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b" - [(set_attr "type" "crypto_sha3")] - ) -@@ -8561,7 +8561,7 @@ - (match_operand:V2DI 2 "register_operand" "w") - (const_int 1)) - (match_operand:V2DI 1 "register_operand" "w")))] -- "TARGET_SIMD && TARGET_SHA3" -+ "TARGET_SHA3" - "rax1\\t%0.2d, %1.2d, %2.2d" - [(set_attr "type" "crypto_sha3")] - ) -@@ -8573,7 +8573,7 @@ - (match_operand:V2DI 1 "register_operand" "%w") - (match_operand:V2DI 2 "register_operand" "w")) - (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))] -- "TARGET_SIMD && TARGET_SHA3" -+ "TARGET_SHA3" - "xar\\t%0.2d, %1.2d, %2.2d, %3" - [(set_attr "type" "crypto_sha3")] - ) -@@ -8585,7 +8585,7 @@ - (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w")) - (match_operand:VQ_I 2 "register_operand" "w")) - (match_operand:VQ_I 1 "register_operand" "w")))] -- "TARGET_SIMD && TARGET_SHA3" -+ "TARGET_SHA3" - "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b" - [(set_attr "type" "crypto_sha3")] - ) -@@ -8598,7 +8598,7 @@ - (match_operand:V4SI 2 "register_operand" "w") - (match_operand:V4SI 3 "register_operand" "w")] - UNSPEC_SM3SS1))] -- "TARGET_SIMD && TARGET_SM4" -+ "TARGET_SM4" - "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s" - [(set_attr "type" "crypto_sm3")] - ) -@@ -8611,7 +8611,7 @@ - (match_operand:V4SI 3 "register_operand" "w") - (match_operand:SI 4 "aarch64_imm2" "Ui2")] - CRYPTO_SM3TT))] -- "TARGET_SIMD && TARGET_SM4" -+ "TARGET_SM4" - "sm3tt\\t%0.4s, %2.4s, %3.4s[%4]" - [(set_attr "type" "crypto_sm3")] - ) -@@ -8622,7 +8622,7 @@ - (match_operand:V4SI 2 "register_operand" "w") - (match_operand:V4SI 3 "register_operand" "w")] - CRYPTO_SM3PART))] -- "TARGET_SIMD && TARGET_SM4" -+ "TARGET_SM4" - "sm3partw\\t%0.4s, %2.4s, %3.4s" - [(set_attr "type" "crypto_sm3")] - ) -@@ -8634,7 +8634,7 @@ - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") - (match_operand:V4SI 2 "register_operand" "w")] - UNSPEC_SM4E))] -- "TARGET_SIMD && TARGET_SM4" -+ "TARGET_SM4" - "sm4e\\t%0.4s, %2.4s" - [(set_attr "type" "crypto_sm4")] - ) -@@ -8644,7 +8644,7 @@ - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w") - (match_operand:V4SI 2 "register_operand" "w")] - UNSPEC_SM4EKEY))] -- "TARGET_SIMD && TARGET_SM4" -+ "TARGET_SM4" - "sm4ekey\\t%0.4s, %1.4s, %2.4s" - [(set_attr "type" "crypto_sm4")] - ) -@@ -9230,7 +9230,7 @@ - (unspec:TI [(match_operand:DI 1 "register_operand" "w") - (match_operand:DI 2 "register_operand" "w")] - UNSPEC_PMULL))] -- "TARGET_SIMD && TARGET_AES" -+ "TARGET_AES" - "pmull\\t%0.1q, %1.1d, %2.1d" - [(set_attr "type" "crypto_pmull")] - ) -@@ -9240,7 +9240,7 @@ - (unspec:TI [(match_operand:V2DI 1 "register_operand" "w") - (match_operand:V2DI 2 "register_operand" "w")] - UNSPEC_PMULL2))] -- "TARGET_SIMD && TARGET_AES" -+ "TARGET_AES" - "pmull2\\t%0.1q, %1.2d, %2.2d" - [(set_attr "type" "crypto_pmull")] - ) -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 521031efe..2a9d2d031 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -222,19 +222,19 @@ enum class aarch64_feature : unsigned char { - #define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64) - - /* Crypto is an optional extension to AdvSIMD. */ --#define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO) -+#define TARGET_CRYPTO (AARCH64_ISA_CRYPTO) - - /* SHA2 is an optional extension to AdvSIMD. */ --#define TARGET_SHA2 ((TARGET_SIMD && AARCH64_ISA_SHA2) || TARGET_CRYPTO) -+#define TARGET_SHA2 (AARCH64_ISA_SHA2) - - /* SHA3 is an optional extension to AdvSIMD. */ --#define TARGET_SHA3 (TARGET_SIMD && AARCH64_ISA_SHA3) -+#define TARGET_SHA3 (AARCH64_ISA_SHA3) - - /* AES is an optional extension to AdvSIMD. */ --#define TARGET_AES ((TARGET_SIMD && AARCH64_ISA_AES) || TARGET_CRYPTO) -+#define TARGET_AES (AARCH64_ISA_AES) - - /* SM is an optional extension to AdvSIMD. */ --#define TARGET_SM4 (TARGET_SIMD && AARCH64_ISA_SM4) -+#define TARGET_SM4 (AARCH64_ISA_SM4) - - /* FP16FML is an optional extension to AdvSIMD. */ - #define TARGET_F16FML (TARGET_SIMD && AARCH64_ISA_F16FML && TARGET_FP_F16INST) -@@ -246,29 +246,29 @@ enum class aarch64_feature : unsigned char { - #define TARGET_LSE (AARCH64_ISA_LSE) - - /* ARMv8.2-A FP16 support that can be enabled through the +fp16 extension. */ --#define TARGET_FP_F16INST (TARGET_FLOAT && AARCH64_ISA_F16) -+#define TARGET_FP_F16INST (AARCH64_ISA_F16) - #define TARGET_SIMD_F16INST (TARGET_SIMD && AARCH64_ISA_F16) - - /* Dot Product is an optional extension to AdvSIMD enabled through +dotprod. */ --#define TARGET_DOTPROD (TARGET_SIMD && AARCH64_ISA_DOTPROD) -+#define TARGET_DOTPROD (AARCH64_ISA_DOTPROD) - - /* SVE instructions, enabled through +sve. */ - #define TARGET_SVE (AARCH64_ISA_SVE) - - /* SVE2 instructions, enabled through +sve2. */ --#define TARGET_SVE2 (TARGET_SVE && AARCH64_ISA_SVE2) -+#define TARGET_SVE2 (AARCH64_ISA_SVE2) - - /* SVE2 AES instructions, enabled through +sve2-aes. */ --#define TARGET_SVE2_AES (TARGET_SVE2 && AARCH64_ISA_SVE2_AES) -+#define TARGET_SVE2_AES (AARCH64_ISA_SVE2_AES) - - /* SVE2 BITPERM instructions, enabled through +sve2-bitperm. */ --#define TARGET_SVE2_BITPERM (TARGET_SVE2 && AARCH64_ISA_SVE2_BITPERM) -+#define TARGET_SVE2_BITPERM (AARCH64_ISA_SVE2_BITPERM) - - /* SVE2 SHA3 instructions, enabled through +sve2-sha3. */ --#define TARGET_SVE2_SHA3 (TARGET_SVE2 && AARCH64_ISA_SVE2_SHA3) -+#define TARGET_SVE2_SHA3 (AARCH64_ISA_SVE2_SHA3) - - /* SVE2 SM4 instructions, enabled through +sve2-sm4. */ --#define TARGET_SVE2_SM4 (TARGET_SVE2 && AARCH64_ISA_SVE2_SM4) -+#define TARGET_SVE2_SM4 (AARCH64_ISA_SVE2_SM4) - - /* ARMv8.3-A features. */ - #define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A) -@@ -296,12 +296,10 @@ enum class aarch64_feature : unsigned char { - #define TARGET_SVE_I8MM (TARGET_SVE && AARCH64_ISA_I8MM) - - /* F32MM instructions are enabled through +f32mm. */ --#define TARGET_F32MM (AARCH64_ISA_F32MM) --#define TARGET_SVE_F32MM (TARGET_SVE && AARCH64_ISA_F32MM) -+#define TARGET_SVE_F32MM (AARCH64_ISA_F32MM) - - /* F64MM instructions are enabled through +f64mm. */ --#define TARGET_F64MM (AARCH64_ISA_F64MM) --#define TARGET_SVE_F64MM (TARGET_SVE && AARCH64_ISA_F64MM) -+#define TARGET_SVE_F64MM (AARCH64_ISA_F64MM) - - /* BF16 instructions are enabled through +bf16. */ - #define TARGET_BF16_FP (AARCH64_ISA_BF16) -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index c0c64a798..7ee26284d 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -6417,7 +6417,7 @@ - (define_expand "dihf2" - [(set (match_operand:HF 0 "register_operand") - (FLOATUORS:HF (match_operand:DI 1 "register_operand")))] -- "TARGET_FLOAT && (TARGET_FP_F16INST || TARGET_SIMD)" -+ "TARGET_FP_F16INST || TARGET_SIMD" - { - if (TARGET_FP_F16INST) - emit_insn (gen_aarch64_fp16_dihf2 (operands[0], operands[1])); -@@ -6676,7 +6676,7 @@ - [(match_operand:GPF 0 "register_operand") - (match_operand:GPF 1 "register_operand") - (match_operand:GPF 2 "register_operand")] -- "TARGET_FLOAT && TARGET_SIMD" -+ "TARGET_SIMD" - { - rtx bitmask = gen_reg_rtx (mode); - emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U -@@ -6693,7 +6693,7 @@ - (match_operand:GPF 2 "register_operand" "w,w,0,0") - (match_operand: 3 "register_operand" "0,w,w,X")] - UNSPEC_COPYSIGN))] -- "TARGET_FLOAT && TARGET_SIMD" -+ "TARGET_SIMD" - "@ - bsl\\t%0., %2., %1. - bit\\t%0., %2., %3. -@@ -6714,7 +6714,7 @@ - [(match_operand:GPF 0 "register_operand") - (match_operand:GPF 1 "register_operand") - (match_operand:GPF 2 "register_operand")] -- "TARGET_FLOAT && TARGET_SIMD" -+ "TARGET_SIMD" - { - - machine_mode imode = mode; --- -2.33.0 - diff --git a/0126-Backport-SME-aarch64-Define-__ARM_FEATURE_RCPC.patch b/0126-Backport-SME-aarch64-Define-__ARM_FEATURE_RCPC.patch deleted file mode 100644 index 8fe079c..0000000 --- a/0126-Backport-SME-aarch64-Define-__ARM_FEATURE_RCPC.patch +++ /dev/null @@ -1,132 +0,0 @@ -From 53a858c0c371cbea27ed4170a94fb3918b9fcdcf Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 4 Oct 2022 16:39:18 +0100 -Subject: [PATCH 027/157] [Backport][SME] aarch64: Define __ARM_FEATURE_RCPC - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c1b0a767f04a8ccbaff2a7b71d5c817cdb469630 - -https://github.com/ARM-software/acle/pull/199 adds a new feature -macro for RCPC, for use in things like inline assembly. This patch -adds the associated support to GCC. - -Also, RCPC is required for Armv8.3-A and later, but the armv8.3-a -entry didn't include it. This was probably harmless in practice -since GCC simply ignored the extension until now. (The GAS -definition is OK.) - -gcc/ - * config/aarch64/aarch64.h (AARCH64_ISA_RCPC): New macro. - * config/aarch64/aarch64-arches.def (armv8.3-a): Include RCPC. - * config/aarch64/aarch64-cores.def (thunderx3t110, zeus, neoverse-v1) - (neoverse-512tvb, saphira): Remove RCPC from these Armv8.3-A+ cores. - * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Define - __ARM_FEATURE_RCPC when appropriate. - -gcc/testsuite/ - * gcc.target/aarch64/pragma_cpp_predefs_1.c: Add RCPC tests. ---- - gcc/config/aarch64/aarch64-arches.def | 2 +- - gcc/config/aarch64/aarch64-c.cc | 1 + - gcc/config/aarch64/aarch64-cores.def | 10 +++++----- - gcc/config/aarch64/aarch64.h | 1 + - .../gcc.target/aarch64/pragma_cpp_predefs_1.c | 20 +++++++++++++++++++ - 5 files changed, 28 insertions(+), 6 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def -index 9f8246618..5a9eff336 100644 ---- a/gcc/config/aarch64/aarch64-arches.def -+++ b/gcc/config/aarch64/aarch64-arches.def -@@ -33,7 +33,7 @@ - AARCH64_ARCH("armv8-a", generic, V8A, 8, (SIMD)) - AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, (V8A, LSE, CRC, RDMA)) - AARCH64_ARCH("armv8.2-a", generic, V8_2A, 8, (V8_1A)) --AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, (V8_2A, PAUTH)) -+AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, (V8_2A, PAUTH, RCPC)) - AARCH64_ARCH("armv8.4-a", generic, V8_4A, 8, (V8_3A, F16FML, DOTPROD, FLAGM)) - AARCH64_ARCH("armv8.5-a", generic, V8_5A, 8, (V8_4A, SB, SSBS, PREDRES)) - AARCH64_ARCH("armv8.6-a", generic, V8_6A, 8, (V8_5A, I8MM, BF16)) -diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc -index 2dfe2b8f8..4085ad840 100644 ---- a/gcc/config/aarch64/aarch64-c.cc -+++ b/gcc/config/aarch64/aarch64-c.cc -@@ -202,6 +202,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) - "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile); - aarch64_def_or_undef (TARGET_LS64, - "__ARM_FEATURE_LS64", pfile); -+ aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile); - - /* Not for ACLE, but required to keep "float.h" correct if we switch - target between implementations that do or do not support ARMv8.2-A -diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def -index 60299160b..b50628d6b 100644 ---- a/gcc/config/aarch64/aarch64-cores.def -+++ b/gcc/config/aarch64/aarch64-cores.def -@@ -133,17 +133,17 @@ AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, (CRYPTO, F16), tsv110, 0x48, 0 - /* ARMv8.3-A Architecture Processors. */ - - /* Marvell cores (TX3). */ --AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, (CRYPTO, RCPC, SM4, SHA3, F16FML), thunderx3t110, 0x43, 0x0b8, 0x0a) -+AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, (CRYPTO, SM4, SHA3, F16FML), thunderx3t110, 0x43, 0x0b8, 0x0a) - - /* ARMv8.4-A Architecture Processors. */ - - /* Arm ('A') cores. */ --AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1) --AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1) --AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) -+AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1) -+AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1) -+AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoverse512tvb, INVALID_IMP, INVALID_CORE, -1) - - /* Qualcomm ('Q') cores. */ --AARCH64_CORE("saphira", saphira, saphira, V8_4A, (CRYPTO, RCPC), saphira, 0x51, 0xC01, -1) -+AARCH64_CORE("saphira", saphira, saphira, V8_4A, (CRYPTO), saphira, 0x51, 0xC01, -1) - - /* ARMv8-A big.LITTLE implementations. */ - -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 2a9d2d031..19b82b4f3 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -201,6 +201,7 @@ enum class aarch64_feature : unsigned char { - #define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4) - #define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3) - #define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML) -+#define AARCH64_ISA_RCPC (aarch64_isa_flags & AARCH64_FL_RCPC) - #define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_V8_4A) - #define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG) - #define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5A) -diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c -index bfb044f5d..307fa3d67 100644 ---- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c -@@ -248,6 +248,26 @@ - #error "__ARM_FEATURE_CRC32 is not defined but should be!" - #endif - -+#pragma GCC target ("arch=armv8.2-a") -+#ifdef __ARM_FEATURE_RCPC -+#error "__ARM_FEATURE_RCPC is defined but should not be!" -+#endif -+ -+#pragma GCC target ("arch=armv8.2-a+rcpc") -+#ifndef __ARM_FEATURE_RCPC -+#error "__ARM_FEATURE_RCPC is not defined but should be!" -+#endif -+ -+#pragma GCC target ("+norcpc") -+#ifdef __ARM_FEATURE_RCPC -+#error "__ARM_FEATURE_RCPC is defined but should not be!" -+#endif -+ -+#pragma GCC target ("arch=armv8.3-a") -+#ifndef __ARM_FEATURE_RCPC -+#error "__ARM_FEATURE_RCPC is not defined but should be!" -+#endif -+ - int - foo (int a) - { --- -2.33.0 - diff --git a/0127-Backport-SME-Add-Ampere-1-and-Ampere-1A-core-definit.patch b/0127-Backport-SME-Add-Ampere-1-and-Ampere-1A-core-definit.patch deleted file mode 100644 index d317791..0000000 --- a/0127-Backport-SME-Add-Ampere-1-and-Ampere-1A-core-definit.patch +++ /dev/null @@ -1,29 +0,0 @@ -From f6b2917888292c694bae1debe8abb0d6c2c6f59e Mon Sep 17 00:00:00 2001 -From: xiezhiheng -Date: Tue, 20 Feb 2024 11:03:47 +0800 -Subject: [PATCH 028/157] [Backport][SME] Add Ampere-1 and Ampere-1A core - definition in aarch64-cores.def - -From commit db2f5d661239737157cf131de7d4df1c17d8d88d and -590a06afbf0e96813b5879742f38f3665512c854 ---- - gcc/config/aarch64/aarch64-cores.def | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def -index b50628d6b..f069c81cf 100644 ---- a/gcc/config/aarch64/aarch64-cores.def -+++ b/gcc/config/aarch64/aarch64-cores.def -@@ -69,7 +69,8 @@ AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, (CRC, CRYPTO), thu - AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, (CRC, CRYPTO), thunderx, 0x43, 0x0a3, -1) - - /* Ampere Computing ('\xC0') cores. */ --AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, (), ampere1, 0xC0, 0xac3, -1) -+AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, (F16, RNG, AES, SHA3), ampere1, 0xC0, 0xac3, -1) -+AARCH64_CORE("ampere1a", ampere1a, cortexa57, V8_6A, (F16, RNG, AES, SHA3, MEMTAG), ampere1a, 0xC0, 0xac4, -1) - /* Do not swap around "emag" and "xgene1", - this order is required to handle variant correctly. */ - AARCH64_CORE("emag", emag, xgene1, V8A, (CRC, CRYPTO), emag, 0x50, 0x000, 3) --- -2.33.0 - diff --git a/0128-Backport-SME-aarch64-Fix-nosimd-handling-of-FPR-move.patch b/0128-Backport-SME-aarch64-Fix-nosimd-handling-of-FPR-move.patch deleted file mode 100644 index 9ad166c..0000000 --- a/0128-Backport-SME-aarch64-Fix-nosimd-handling-of-FPR-move.patch +++ /dev/null @@ -1,968 +0,0 @@ -From 81a4b464d01cf00f8b355115588e67bf2c021acd Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Wed, 7 Sep 2022 10:52:04 +0100 -Subject: [PATCH 029/157] [Backport][SME] aarch64: Fix +nosimd handling of FPR - moves - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d6106132907f6bd01109f2616d20a87edecc6fc6 - -8-bit and 16-bit FPR moves would ICE for +nosimd+fp, and some other -moves would handle FPR<-zero inefficiently. This is very much a -niche case at the moment, but something like it becomes more -important with SME streaming mode. - -The si, di and vector tests already passed, they're just included for -completeness. - -We're a bit inconsistent about whether alternatives involving FPRs -are marked with arch==fp or arch=* (i.e. default). E.g. FPR loads -and stores are sometimes * and sometimes fp. - -IMO * makes more sense. FPRs should not be used at all without -TARGET_FLOAT, so TARGET_FLOAT represents the base architecture -when FPRs are enabled. I think it's more useful if non-default -arches represent a genuine restriction. - -gcc/ - * config/aarch64/aarch64.md (*mov_aarch64): Extend - w<-w, r<-w and w<-r alternatives to !simd, using 32-bit moves - in that case. Extend w<-r to w<-Z. - (*mov_aarch64): Likewise, but with Y instead of Z. - (*movti_aarch64): Use an FMOV from XZR for w<-Z if MOVI is not - available. - (define_split): Do not apply the floating-point immediate-to-register - split to zeros, even if MOVI is not available. - -gcc/testsuite/ - * gcc.target/aarch64/movqi_1.c: New test. - * gcc.target/aarch64/movhi_1.c: Likewise. - * gcc.target/aarch64/movsi_1.c: Likewise. - * gcc.target/aarch64/movdi_2.c: Likewise. - * gcc.target/aarch64/movti_2.c: Likewise. - * gcc.target/aarch64/movhf_1.c: Likewise. - * gcc.target/aarch64/movsf_1.c: Likewise. - * gcc.target/aarch64/movdf_1.c: Likewise. - * gcc.target/aarch64/movtf_2.c: Likewise. - * gcc.target/aarch64/movv8qi_1.c: Likewise. - * gcc.target/aarch64/movv16qi_1.c: Likewise. ---- - gcc/config/aarch64/aarch64.md | 38 ++++---- - gcc/testsuite/gcc.target/aarch64/movdf_1.c | 53 ++++++++++++ - gcc/testsuite/gcc.target/aarch64/movdi_2.c | 61 +++++++++++++ - gcc/testsuite/gcc.target/aarch64/movhf_1.c | 53 ++++++++++++ - gcc/testsuite/gcc.target/aarch64/movhi_1.c | 61 +++++++++++++ - gcc/testsuite/gcc.target/aarch64/movqi_1.c | 61 +++++++++++++ - gcc/testsuite/gcc.target/aarch64/movsf_1.c | 53 ++++++++++++ - gcc/testsuite/gcc.target/aarch64/movsi_1.c | 61 +++++++++++++ - gcc/testsuite/gcc.target/aarch64/movtf_2.c | 81 +++++++++++++++++ - gcc/testsuite/gcc.target/aarch64/movti_2.c | 86 +++++++++++++++++++ - gcc/testsuite/gcc.target/aarch64/movv16qi_1.c | 82 ++++++++++++++++++ - gcc/testsuite/gcc.target/aarch64/movv8qi_1.c | 55 ++++++++++++ - 12 files changed, 729 insertions(+), 16 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/movdf_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movdi_2.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movhf_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movhi_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movqi_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movsf_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movsi_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_2.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_2.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movv8qi_1.c - -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index 7ee26284d..7267a74d6 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -1201,7 +1201,7 @@ - - (define_insn "*mov_aarch64" - [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, w,r ,r,w, m,m,r,w,w") -- (match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D,Usv,m,m,rZ,w,w,r,w"))] -+ (match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D,Usv,m,m,rZ,w,w,rZ,w"))] - "(register_operand (operands[0], mode) - || aarch64_reg_or_zero (operands[1], mode))" - { -@@ -1225,11 +1225,11 @@ - case 7: - return "str\t%1, %0"; - case 8: -- return "umov\t%w0, %1.[0]"; -+ return TARGET_SIMD ? "umov\t%w0, %1.[0]" : "fmov\t%w0, %s1"; - case 9: -- return "dup\t%0., %w1"; -+ return TARGET_SIMD ? "dup\t%0., %w1" : "fmov\t%s0, %w1"; - case 10: -- return "dup\t%0, %1.[0]"; -+ return TARGET_SIMD ? "dup\t%0, %1.[0]" : "fmov\t%s0, %s1"; - default: - gcc_unreachable (); - } -@@ -1237,7 +1237,7 @@ - ;; The "mov_imm" type for CNT is just a placeholder. - [(set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4, - store_4,neon_to_gp,neon_from_gp,neon_dup") -- (set_attr "arch" "*,*,simd,sve,*,*,*,*,simd,simd,simd")] -+ (set_attr "arch" "*,*,simd,sve,*,*,*,*,*,*,*")] - ) - - (define_expand "mov" -@@ -1399,14 +1399,15 @@ - - (define_insn "*movti_aarch64" - [(set (match_operand:TI 0 -- "nonimmediate_operand" "= r,w,w, r,w,r,m,m,w,m") -+ "nonimmediate_operand" "= r,w,w,w, r,w,r,m,m,w,m") - (match_operand:TI 1 -- "aarch64_movti_operand" " rUti,Z,r, w,w,m,r,Z,m,w"))] -+ "aarch64_movti_operand" " rUti,Z,Z,r, w,w,m,r,Z,m,w"))] - "(register_operand (operands[0], TImode) - || aarch64_reg_or_zero (operands[1], TImode))" - "@ - # - movi\\t%0.2d, #0 -+ fmov\t%d0, xzr - # - # - mov\\t%0.16b, %1.16b -@@ -1415,11 +1416,11 @@ - stp\\txzr, xzr, %0 - ldr\\t%q0, %1 - str\\t%q1, %0" -- [(set_attr "type" "multiple,neon_move,f_mcr,f_mrc,neon_logic_q, \ -+ [(set_attr "type" "multiple,neon_move,f_mcr,f_mcr,f_mrc,neon_logic_q, \ - load_16,store_16,store_16,\ - load_16,store_16") -- (set_attr "length" "8,4,8,8,4,4,4,4,4,4") -- (set_attr "arch" "*,simd,*,*,simd,*,*,*,fp,fp")] -+ (set_attr "length" "8,4,4,8,8,4,4,4,4,4,4") -+ (set_attr "arch" "*,simd,*,*,*,simd,*,*,*,fp,fp")] - ) - - ;; Split a TImode register-register or register-immediate move into -@@ -1458,16 +1459,19 @@ - ) - - (define_insn "*mov_aarch64" -- [(set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w , w,?r,w,w ,w ,w,m,r,m ,r") -- (match_operand:HFBF 1 "general_operand" "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))] -+ [(set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w ,w ,w ,?r,?r,w,w,w ,w ,w,m,r,m ,r") -+ (match_operand:HFBF 1 "general_operand" "Y ,?rY,?r,?rY, w, w,w,w,Ufc,Uvi,m,w,m,rY,r"))] - "TARGET_FLOAT && (register_operand (operands[0], mode) - || aarch64_reg_or_fp_zero (operands[1], mode))" - "@ - movi\\t%0.4h, #0 - fmov\\t%h0, %w1 - dup\\t%w0.4h, %w1 -+ fmov\\t%s0, %w1 - umov\\t%w0, %1.h[0] -+ fmov\\t%w0, %s1 - mov\\t%0.h[0], %1.h[0] -+ fmov\\t%s0, %s1 - fmov\\t%h0, %1 - * return aarch64_output_scalar_simd_mov_immediate (operands[1], HImode); - ldr\\t%h0, %1 -@@ -1475,9 +1479,10 @@ - ldrh\\t%w0, %1 - strh\\t%w1, %0 - mov\\t%w0, %w1" -- [(set_attr "type" "neon_move,f_mcr,neon_move,neon_to_gp, neon_move,fconsts, \ -- neon_move,f_loads,f_stores,load_4,store_4,mov_reg") -- (set_attr "arch" "simd,fp16,simd,simd,simd,fp16,simd,*,*,*,*,*")] -+ [(set_attr "type" "neon_move,f_mcr,neon_move,f_mcr,neon_to_gp,f_mrc, -+ neon_move,fmov,fconsts,neon_move,f_loads,f_stores, -+ load_4,store_4,mov_reg") -+ (set_attr "arch" "simd,fp16,simd,*,simd,*,simd,*,fp16,simd,*,*,*,*,*")] - ) - - (define_insn "*movsf_aarch64" -@@ -1530,10 +1535,11 @@ - - (define_split - [(set (match_operand:GPF_HF 0 "nonimmediate_operand") -- (match_operand:GPF_HF 1 "general_operand"))] -+ (match_operand:GPF_HF 1 "const_double_operand"))] - "can_create_pseudo_p () - && !aarch64_can_const_movi_rtx_p (operands[1], mode) - && !aarch64_float_const_representable_p (operands[1]) -+ && !aarch64_float_const_zero_rtx_p (operands[1]) - && aarch64_float_const_rtx_p (operands[1])" - [(const_int 0)] - { -diff --git a/gcc/testsuite/gcc.target/aarch64/movdf_1.c b/gcc/testsuite/gcc.target/aarch64/movdf_1.c -new file mode 100644 -index 000000000..a51ded1d6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movdf_1.c -@@ -0,0 +1,53 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nothing+nosimd+fp" -+ -+/* -+** fpr_to_fpr: -+** fmov d0, d1 -+** ret -+*/ -+double -+fpr_to_fpr (double q0, double q1) -+{ -+ return q1; -+} -+ -+/* -+** gpr_to_fpr: -+** fmov d0, x0 -+** ret -+*/ -+double -+gpr_to_fpr () -+{ -+ register double x0 asm ("x0"); -+ asm volatile ("" : "=r" (x0)); -+ return x0; -+} -+ -+/* -+** zero_to_fpr: -+** fmov d0, xzr -+** ret -+*/ -+double -+zero_to_fpr () -+{ -+ return 0; -+} -+ -+/* -+** fpr_to_gpr: -+** fmov x0, d0 -+** ret -+*/ -+void -+fpr_to_gpr (double q0) -+{ -+ register double x0 asm ("x0"); -+ x0 = q0; -+ asm volatile ("" :: "r" (x0)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movdi_2.c b/gcc/testsuite/gcc.target/aarch64/movdi_2.c -new file mode 100644 -index 000000000..dd3fc3e8a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movdi_2.c -@@ -0,0 +1,61 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nothing+nosimd+fp" -+ -+#include -+ -+/* -+** fpr_to_fpr: -+** fmov d0, d1 -+** ret -+*/ -+void -+fpr_to_fpr (void) -+{ -+ register uint64_t q0 asm ("q0"); -+ register uint64_t q1 asm ("q1"); -+ asm volatile ("" : "=w" (q1)); -+ q0 = q1; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** gpr_to_fpr: -+** fmov d0, x0 -+** ret -+*/ -+void -+gpr_to_fpr (uint64_t x0) -+{ -+ register uint64_t q0 asm ("q0"); -+ q0 = x0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** zero_to_fpr: -+** fmov d0, xzr -+** ret -+*/ -+void -+zero_to_fpr () -+{ -+ register uint64_t q0 asm ("q0"); -+ q0 = 0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** fpr_to_gpr: -+** fmov x0, d0 -+** ret -+*/ -+uint64_t -+fpr_to_gpr () -+{ -+ register uint64_t q0 asm ("q0"); -+ asm volatile ("" : "=w" (q0)); -+ return q0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movhf_1.c b/gcc/testsuite/gcc.target/aarch64/movhf_1.c -new file mode 100644 -index 000000000..cae25d4e5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movhf_1.c -@@ -0,0 +1,53 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nothing+nosimd+fp" -+ -+/* -+** fpr_to_fpr: -+** fmov s0, s1 -+** ret -+*/ -+_Float16 -+fpr_to_fpr (_Float16 q0, _Float16 q1) -+{ -+ return q1; -+} -+ -+/* -+** gpr_to_fpr: -+** fmov s0, w0 -+** ret -+*/ -+_Float16 -+gpr_to_fpr () -+{ -+ register _Float16 w0 asm ("w0"); -+ asm volatile ("" : "=r" (w0)); -+ return w0; -+} -+ -+/* -+** zero_to_fpr: -+** fmov s0, wzr -+** ret -+*/ -+_Float16 -+zero_to_fpr () -+{ -+ return 0; -+} -+ -+/* -+** fpr_to_gpr: -+** fmov w0, s0 -+** ret -+*/ -+void -+fpr_to_gpr (_Float16 q0) -+{ -+ register _Float16 w0 asm ("w0"); -+ w0 = q0; -+ asm volatile ("" :: "r" (w0)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movhi_1.c b/gcc/testsuite/gcc.target/aarch64/movhi_1.c -new file mode 100644 -index 000000000..8017abc5f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movhi_1.c -@@ -0,0 +1,61 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nothing+nosimd+fp" -+ -+#include -+ -+/* -+** fpr_to_fpr: -+** fmov s0, s1 -+** ret -+*/ -+void -+fpr_to_fpr (void) -+{ -+ register uint16_t q0 asm ("q0"); -+ register uint16_t q1 asm ("q1"); -+ asm volatile ("" : "=w" (q1)); -+ q0 = q1; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** gpr_to_fpr: -+** fmov s0, w0 -+** ret -+*/ -+void -+gpr_to_fpr (uint16_t w0) -+{ -+ register uint16_t q0 asm ("q0"); -+ q0 = w0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** zero_to_fpr: -+** fmov s0, wzr -+** ret -+*/ -+void -+zero_to_fpr () -+{ -+ register uint16_t q0 asm ("q0"); -+ q0 = 0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** fpr_to_gpr: -+** fmov w0, s0 -+** ret -+*/ -+uint16_t -+fpr_to_gpr () -+{ -+ register uint16_t q0 asm ("q0"); -+ asm volatile ("" : "=w" (q0)); -+ return q0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movqi_1.c b/gcc/testsuite/gcc.target/aarch64/movqi_1.c -new file mode 100644 -index 000000000..401a79630 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movqi_1.c -@@ -0,0 +1,61 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nothing+nosimd+fp" -+ -+#include -+ -+/* -+** fpr_to_fpr: -+** fmov s0, s1 -+** ret -+*/ -+void -+fpr_to_fpr (void) -+{ -+ register uint8_t q0 asm ("q0"); -+ register uint8_t q1 asm ("q1"); -+ asm volatile ("" : "=w" (q1)); -+ q0 = q1; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** gpr_to_fpr: -+** fmov s0, w0 -+** ret -+*/ -+void -+gpr_to_fpr (uint8_t w0) -+{ -+ register uint8_t q0 asm ("q0"); -+ q0 = w0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** zero_to_fpr: -+** fmov s0, wzr -+** ret -+*/ -+void -+zero_to_fpr () -+{ -+ register uint8_t q0 asm ("q0"); -+ q0 = 0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** fpr_to_gpr: -+** fmov w0, s0 -+** ret -+*/ -+uint8_t -+fpr_to_gpr () -+{ -+ register uint8_t q0 asm ("q0"); -+ asm volatile ("" : "=w" (q0)); -+ return q0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movsf_1.c b/gcc/testsuite/gcc.target/aarch64/movsf_1.c -new file mode 100644 -index 000000000..09715aa4f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movsf_1.c -@@ -0,0 +1,53 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nothing+nosimd+fp" -+ -+/* -+** fpr_to_fpr: -+** fmov s0, s1 -+** ret -+*/ -+float -+fpr_to_fpr (float q0, float q1) -+{ -+ return q1; -+} -+ -+/* -+** gpr_to_fpr: -+** fmov s0, w0 -+** ret -+*/ -+float -+gpr_to_fpr () -+{ -+ register float w0 asm ("w0"); -+ asm volatile ("" : "=r" (w0)); -+ return w0; -+} -+ -+/* -+** zero_to_fpr: -+** fmov s0, wzr -+** ret -+*/ -+float -+zero_to_fpr () -+{ -+ return 0; -+} -+ -+/* -+** fpr_to_gpr: -+** fmov w0, s0 -+** ret -+*/ -+void -+fpr_to_gpr (float q0) -+{ -+ register float w0 asm ("w0"); -+ w0 = q0; -+ asm volatile ("" :: "r" (w0)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movsi_1.c b/gcc/testsuite/gcc.target/aarch64/movsi_1.c -new file mode 100644 -index 000000000..5314139aa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movsi_1.c -@@ -0,0 +1,61 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nothing+nosimd+fp" -+ -+#include -+ -+/* -+** fpr_to_fpr: -+** fmov s0, s1 -+** ret -+*/ -+void -+fpr_to_fpr (void) -+{ -+ register uint32_t q0 asm ("q0"); -+ register uint32_t q1 asm ("q1"); -+ asm volatile ("" : "=w" (q1)); -+ q0 = q1; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** gpr_to_fpr: -+** fmov s0, w0 -+** ret -+*/ -+void -+gpr_to_fpr (uint32_t w0) -+{ -+ register uint32_t q0 asm ("q0"); -+ q0 = w0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** zero_to_fpr: -+** fmov s0, wzr -+** ret -+*/ -+void -+zero_to_fpr () -+{ -+ register uint32_t q0 asm ("q0"); -+ q0 = 0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** fpr_to_gpr: -+** fmov w0, s0 -+** ret -+*/ -+uint32_t -+fpr_to_gpr () -+{ -+ register uint32_t q0 asm ("q0"); -+ asm volatile ("" : "=w" (q0)); -+ return q0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_2.c b/gcc/testsuite/gcc.target/aarch64/movtf_2.c -new file mode 100644 -index 000000000..38b16358d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movtf_2.c -@@ -0,0 +1,81 @@ -+/* { dg-do assemble } */ -+/* { dg-require-effective-target large_long_double } */ -+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nothing+nosimd+fp" -+ -+/* -+** fpr_to_fpr: -+** sub sp, sp, #16 -+** str q1, \[sp\] -+** ldr q0, \[sp\] -+** add sp, sp, #?16 -+** ret -+*/ -+long double -+fpr_to_fpr (long double q0, long double q1) -+{ -+ return q1; -+} -+ -+/* -+** gpr_to_fpr: { target aarch64_little_endian } -+** fmov d0, x0 -+** fmov v0.d\[1\], x1 -+** ret -+*/ -+/* -+** gpr_to_fpr: { target aarch64_big_endian } -+** fmov d0, x1 -+** fmov v0.d\[1\], x0 -+** ret -+*/ -+long double -+gpr_to_fpr () -+{ -+ register long double x0 asm ("x0"); -+ asm volatile ("" : "=r" (x0)); -+ return x0; -+} -+ -+/* -+** zero_to_fpr: -+** fmov s0, wzr -+** ret -+*/ -+long double -+zero_to_fpr () -+{ -+ return 0; -+} -+ -+/* -+** fpr_to_gpr: { target aarch64_little_endian } -+** ( -+** fmov x0, d0 -+** fmov x1, v0.d\[1\] -+** | -+** fmov x1, v0.d\[1\] -+** fmov x0, d0 -+** ) -+** ret -+*/ -+/* -+** fpr_to_gpr: { target aarch64_big_endian } -+** ( -+** fmov x1, d0 -+** fmov x0, v0.d\[1\] -+** | -+** fmov x0, v0.d\[1\] -+** fmov x1, d0 -+** ) -+** ret -+*/ -+void -+fpr_to_gpr (long double q0) -+{ -+ register long double x0 asm ("x0"); -+ x0 = q0; -+ asm volatile ("" :: "r" (x0)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movti_2.c b/gcc/testsuite/gcc.target/aarch64/movti_2.c -new file mode 100644 -index 000000000..c393b1220 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movti_2.c -@@ -0,0 +1,86 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nothing+nosimd+fp" -+ -+/* -+** fpr_to_fpr: -+** sub sp, sp, #16 -+** str q1, \[sp\] -+** ldr q0, \[sp\] -+** add sp, sp, #?16 -+** ret -+*/ -+void -+fpr_to_fpr (void) -+{ -+ register __int128_t q0 asm ("q0"); -+ register __int128_t q1 asm ("q1"); -+ asm volatile ("" : "=w" (q1)); -+ q0 = q1; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** gpr_to_fpr: { target aarch64_little_endian } -+** fmov d0, x0 -+** fmov v0.d\[1\], x1 -+** ret -+*/ -+/* -+** gpr_to_fpr: { target aarch64_big_endian } -+** fmov d0, x1 -+** fmov v0.d\[1\], x0 -+** ret -+*/ -+void -+gpr_to_fpr (__int128_t x0) -+{ -+ register __int128_t q0 asm ("q0"); -+ q0 = x0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** zero_to_fpr: -+** fmov d0, xzr -+** ret -+*/ -+void -+zero_to_fpr () -+{ -+ register __int128_t q0 asm ("q0"); -+ q0 = 0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** fpr_to_gpr: { target aarch64_little_endian } -+** ( -+** fmov x0, d0 -+** fmov x1, v0.d\[1\] -+** | -+** fmov x1, v0.d\[1\] -+** fmov x0, d0 -+** ) -+** ret -+*/ -+/* -+** fpr_to_gpr: { target aarch64_big_endian } -+** ( -+** fmov x1, d0 -+** fmov x0, v0.d\[1\] -+** | -+** fmov x0, v0.d\[1\] -+** fmov x1, d0 -+** ) -+** ret -+*/ -+__int128_t -+fpr_to_gpr () -+{ -+ register __int128_t q0 asm ("q0"); -+ asm volatile ("" : "=w" (q0)); -+ return q0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c -new file mode 100644 -index 000000000..8a6afb13b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c -@@ -0,0 +1,82 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nothing+nosimd+fp" -+ -+typedef unsigned char v16qi __attribute__((vector_size(16))); -+ -+/* -+** fpr_to_fpr: -+** sub sp, sp, #16 -+** str q1, \[sp\] -+** ldr q0, \[sp\] -+** add sp, sp, #?16 -+** ret -+*/ -+v16qi -+fpr_to_fpr (v16qi q0, v16qi q1) -+{ -+ return q1; -+} -+ -+/* -+** gpr_to_fpr: { target aarch64_little_endian } -+** fmov d0, x0 -+** fmov v0.d\[1\], x1 -+** ret -+*/ -+/* -+** gpr_to_fpr: { target aarch64_big_endian } -+** fmov d0, x1 -+** fmov v0.d\[1\], x0 -+** ret -+*/ -+v16qi -+gpr_to_fpr () -+{ -+ register v16qi x0 asm ("x0"); -+ asm volatile ("" : "=r" (x0)); -+ return x0; -+} -+ -+/* -+** zero_to_fpr: -+** fmov d0, xzr -+** ret -+*/ -+v16qi -+zero_to_fpr () -+{ -+ return (v16qi) {}; -+} -+ -+/* -+** fpr_to_gpr: { target aarch64_little_endian } -+** ( -+** fmov x0, d0 -+** fmov x1, v0.d\[1\] -+** | -+** fmov x1, v0.d\[1\] -+** fmov x0, d0 -+** ) -+** ret -+*/ -+/* -+** fpr_to_gpr: { target aarch64_big_endian } -+** ( -+** fmov x1, d0 -+** fmov x0, v0.d\[1\] -+** | -+** fmov x0, v0.d\[1\] -+** fmov x1, d0 -+** ) -+** ret -+*/ -+void -+fpr_to_gpr (v16qi q0) -+{ -+ register v16qi x0 asm ("x0"); -+ x0 = q0; -+ asm volatile ("" :: "r" (x0)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c -new file mode 100644 -index 000000000..4c97e6fbc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c -@@ -0,0 +1,55 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nothing+nosimd+fp" -+ -+typedef unsigned char v8qi __attribute__((vector_size(8))); -+ -+/* -+** fpr_to_fpr: -+** fmov d0, d1 -+** ret -+*/ -+v8qi -+fpr_to_fpr (v8qi q0, v8qi q1) -+{ -+ return q1; -+} -+ -+/* -+** gpr_to_fpr: -+** fmov d0, x0 -+** ret -+*/ -+v8qi -+gpr_to_fpr () -+{ -+ register v8qi x0 asm ("x0"); -+ asm volatile ("" : "=r" (x0)); -+ return x0; -+} -+ -+/* -+** zero_to_fpr: -+** fmov d0, xzr -+** ret -+*/ -+v8qi -+zero_to_fpr () -+{ -+ return (v8qi) {}; -+} -+ -+/* -+** fpr_to_gpr: -+** fmov x0, d0 -+** ret -+*/ -+void -+fpr_to_gpr (v8qi q0) -+{ -+ register v8qi x0 asm ("x0"); -+ x0 = q0; -+ asm volatile ("" :: "r" (x0)); -+} --- -2.33.0 - diff --git a/0129-Backport-SME-aarch64-Commonise-some-folding-code.patch b/0129-Backport-SME-aarch64-Commonise-some-folding-code.patch deleted file mode 100644 index 01fb18d..0000000 --- a/0129-Backport-SME-aarch64-Commonise-some-folding-code.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 805a7aec3ddab49b92bf2d5c1a3e288860cc14bf Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 20 Oct 2022 10:37:35 +0100 -Subject: [PATCH 030/157] [Backport][SME] aarch64: Commonise some folding code - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=df99e9e42094dee0833ac38f53e7fae09b4d133c - -Add an aarch64_sve::gimple_folder helper for folding calls -to integer constants. SME will make more use of this. - -gcc/ - * config/aarch64/aarch64-sve-builtins.h - (gimple_folder::fold_to_cstu): New member function. - * config/aarch64/aarch64-sve-builtins.cc - (gimple_folder::fold_to_cstu): Define. - * config/aarch64/aarch64-sve-builtins-base.cc - (svcnt_bhwd_impl::fold): Use it. ---- - gcc/config/aarch64/aarch64-sve-builtins-base.cc | 9 ++------- - gcc/config/aarch64/aarch64-sve-builtins.cc | 7 +++++++ - gcc/config/aarch64/aarch64-sve-builtins.h | 1 + - 3 files changed, 10 insertions(+), 7 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc -index c24c05487..56c9d75e7 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc -@@ -516,9 +516,7 @@ public: - gimple * - fold (gimple_folder &f) const OVERRIDE - { -- tree count = build_int_cstu (TREE_TYPE (f.lhs), -- GET_MODE_NUNITS (m_ref_mode)); -- return gimple_build_assign (f.lhs, count); -+ return f.fold_to_cstu (GET_MODE_NUNITS (m_ref_mode)); - } - - rtx -@@ -553,10 +551,7 @@ public: - unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode); - HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, elements_per_vq); - if (value >= 0) -- { -- tree count = build_int_cstu (TREE_TYPE (f.lhs), value); -- return gimple_build_assign (f.lhs, count); -- } -+ return f.fold_to_cstu (value); - - return NULL; - } -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc -index a70e3a6b4..e168c8334 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc -@@ -2615,6 +2615,13 @@ gimple_folder::redirect_call (const function_instance &instance) - return call; - } - -+/* Fold the call to constant VAL. */ -+gimple * -+gimple_folder::fold_to_cstu (poly_uint64 val) -+{ -+ return gimple_build_assign (lhs, build_int_cstu (TREE_TYPE (lhs), val)); -+} -+ - /* Fold the call to a PTRUE, taking the element size from type suffix 0. */ - gimple * - gimple_folder::fold_to_ptrue () -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h -index 63d1db776..0d130b871 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.h -+++ b/gcc/config/aarch64/aarch64-sve-builtins.h -@@ -500,6 +500,7 @@ public: - tree load_store_cookie (tree); - - gimple *redirect_call (const function_instance &); -+ gimple *fold_to_cstu (poly_uint64); - gimple *fold_to_pfalse (); - gimple *fold_to_ptrue (); - gimple *fold_to_vl_pred (unsigned int); --- -2.33.0 - diff --git a/0130-Backport-SME-aarch64-Add-a-Z-operand-modifier-for-SV.patch b/0130-Backport-SME-aarch64-Add-a-Z-operand-modifier-for-SV.patch deleted file mode 100644 index bea41d9..0000000 --- a/0130-Backport-SME-aarch64-Add-a-Z-operand-modifier-for-SV.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 8dc1eee26c61bea8aab62080bd961825142685f9 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 7 Nov 2023 15:22:57 +0000 -Subject: [PATCH 031/157] [Backport][SME] aarch64: Add a %Z operand modifier - for SVE registers - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f40eac535bd55192cf93daca16235efbcd91157a - -This patch adds a %Z operand modifier that prints registers as SVE z -registers. The SME patches need this, but so do Tamar's patches. -I'm separating this out to unblock those. - -We should probably document the [wxbhsdqZ] modifiers as -user-facing, but doing that for all of them is a separate patch. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_print_operand): Add a %Z - modifier for SVE registers. ---- - gcc/config/aarch64/aarch64.cc | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 3e83e48ec..fd1114b52 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -11901,6 +11901,10 @@ sizetochar (int size) - 'N': Take the duplicated element in a vector constant - and print the negative of it in decimal. - 'b/h/s/d/q': Print a scalar FP/SIMD register name. -+ 'Z': Same for SVE registers. ('z' was already taken.) -+ Note that it is not necessary to use %Z for operands -+ that have SVE modes. The convention is to use %Z -+ only for non-SVE (or potentially non-SVE) modes. - 'S/T/U/V': Print a FP/SIMD register name for a register list. - The register printed is the FP/SIMD register name - of X + 0/1/2/3 for S/T/U/V. -@@ -12073,6 +12077,8 @@ aarch64_print_operand (FILE *f, rtx x, int code) - case 's': - case 'd': - case 'q': -+ case 'Z': -+ code = TOLOWER (code); - if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) - { - output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); --- -2.33.0 - diff --git a/0131-Backport-SME-mode-switching-Remove-unused-bbnum-fiel.patch b/0131-Backport-SME-mode-switching-Remove-unused-bbnum-fiel.patch deleted file mode 100644 index 15c8418..0000000 --- a/0131-Backport-SME-mode-switching-Remove-unused-bbnum-fiel.patch +++ /dev/null @@ -1,104 +0,0 @@ -From 8a43bd7885ce479cadb0643fbb0fc22d2b0ffced Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Sun, 5 Nov 2023 18:28:46 +0000 -Subject: [PATCH 032/157] [Backport][SME] mode-switching: Remove unused bbnum - field - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2d55ed2b8a754d7279cd002941f7cb481f0fd133 - -seginfo had an unused bbnum field, presumably dating from before -BB information was attached directly to insns. - -gcc/ - * mode-switching.cc: Remove unused forward references. - (seginfo): Remove bbnum. - (new_seginfo): Remove associated argument. - (optimize_mode_switching): Update calls accordingly. ---- - gcc/mode-switching.cc | 18 +++++------------- - 1 file changed, 5 insertions(+), 13 deletions(-) - -diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc -index 6e3f1dc65..4cf8f03a0 100644 ---- a/gcc/mode-switching.cc -+++ b/gcc/mode-switching.cc -@@ -65,13 +65,11 @@ along with GCC; see the file COPYING3. If not see - MODE is the mode this insn must be executed in. - INSN_PTR is the insn to be executed (may be the note that marks the - beginning of a basic block). -- BBNUM is the flow graph basic block this insn occurs in. - NEXT is the next insn in the same basic block. */ - struct seginfo - { - int mode; - rtx_insn *insn_ptr; -- int bbnum; - struct seginfo *next; - HARD_REG_SET regs_live; - }; -@@ -84,11 +82,6 @@ struct bb_info - int mode_in; - }; - --static struct seginfo * new_seginfo (int, rtx_insn *, int, HARD_REG_SET); --static void add_seginfo (struct bb_info *, struct seginfo *); --static void reg_dies (rtx, HARD_REG_SET *); --static void reg_becomes_live (rtx, const_rtx, void *); -- - /* Clear ode I from entity J in bitmap B. */ - #define clear_mode_bit(b, j, i) \ - bitmap_clear_bit (b, (j * max_num_modes) + i) -@@ -148,13 +141,13 @@ commit_mode_sets (struct edge_list *edge_list, int e, struct bb_info *info) - } - - /* Allocate a new BBINFO structure, initialized with the MODE, INSN, -- and basic block BB parameters. -+ and REGS_LIVE parameters. - INSN may not be a NOTE_INSN_BASIC_BLOCK, unless it is an empty - basic block; that allows us later to insert instructions in a FIFO-like - manner. */ - - static struct seginfo * --new_seginfo (int mode, rtx_insn *insn, int bb, HARD_REG_SET regs_live) -+new_seginfo (int mode, rtx_insn *insn, const HARD_REG_SET ®s_live) - { - struct seginfo *ptr; - -@@ -163,7 +156,6 @@ new_seginfo (int mode, rtx_insn *insn, int bb, HARD_REG_SET regs_live) - ptr = XNEW (struct seginfo); - ptr->mode = mode; - ptr->insn_ptr = insn; -- ptr->bbnum = bb; - ptr->next = NULL; - ptr->regs_live = regs_live; - return ptr; -@@ -604,7 +596,7 @@ optimize_mode_switching (void) - gcc_assert (NOTE_INSN_BASIC_BLOCK_P (ins_pos)); - if (ins_pos != BB_END (bb)) - ins_pos = NEXT_INSN (ins_pos); -- ptr = new_seginfo (no_mode, ins_pos, bb->index, live_now); -+ ptr = new_seginfo (no_mode, ins_pos, live_now); - add_seginfo (info + bb->index, ptr); - for (i = 0; i < no_mode; i++) - clear_mode_bit (transp[bb->index], j, i); -@@ -622,7 +614,7 @@ optimize_mode_switching (void) - { - any_set_required = true; - last_mode = mode; -- ptr = new_seginfo (mode, insn, bb->index, live_now); -+ ptr = new_seginfo (mode, insn, live_now); - add_seginfo (info + bb->index, ptr); - for (i = 0; i < no_mode; i++) - clear_mode_bit (transp[bb->index], j, i); -@@ -651,7 +643,7 @@ optimize_mode_switching (void) - mark the block as nontransparent. */ - if (!any_set_required) - { -- ptr = new_seginfo (no_mode, BB_END (bb), bb->index, live_now); -+ ptr = new_seginfo (no_mode, BB_END (bb), live_now); - add_seginfo (info + bb->index, ptr); - if (last_mode != no_mode) - for (i = 0; i < no_mode; i++) --- -2.33.0 - diff --git a/0132-Backport-SME-mode-switching-Tweak-the-macro-hook-doc.patch b/0132-Backport-SME-mode-switching-Tweak-the-macro-hook-doc.patch deleted file mode 100644 index e3c0ff7..0000000 --- a/0132-Backport-SME-mode-switching-Tweak-the-macro-hook-doc.patch +++ /dev/null @@ -1,311 +0,0 @@ -From c980e40d2c27ac3ee33c9b6aea6d2b0d4080852e Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Sat, 11 Nov 2023 17:28:54 +0000 -Subject: [PATCH 033/157] [Backport][SME] mode-switching: Tweak the macro/hook - documentation - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=8479a3759025961f80cf0cd6bb3f127e09d0510d - -I found the documentation for the mode-switching macros/hooks -a bit hard to follow at first. This patch tries to add the -information that I think would have made it easier to understand. - -Of course, documentation preferences are personal, and so I could -be changing something that others understood to something that -seems impenetrable. - -Some notes on specific changes: - -- "in an optimizing compilation" didn't seem accurate; the pass - is run even at -O0, and often needs to be for correctness. - -- "at run time" meant when the compiler was run, rather than when - the compiled code was run. - -- Removing the list of optional macros isn't a clarification, - but it means that upcoming patches don't create an absurdly - long list. - -- I don't really understand the purpose of TARGET_MODE_PRIORITY, - so I mostly left that alone. - -gcc/ - * target.def: Tweak documentation of mode-switching hooks. - * doc/tm.texi.in (OPTIMIZE_MODE_SWITCHING): Tweak documentation. - (NUM_MODES_FOR_MODE_SWITCHING): Likewise. - * doc/tm.texi: Regenerate. ---- - gcc/doc/tm.texi | 69 ++++++++++++++++++++++++++++------------------ - gcc/doc/tm.texi.in | 26 +++++++++-------- - gcc/target.def | 43 ++++++++++++++++++----------- - 3 files changed, 84 insertions(+), 54 deletions(-) - -diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi -index 851d31c18..553aa4cf2 100644 ---- a/gcc/doc/tm.texi -+++ b/gcc/doc/tm.texi -@@ -10234,7 +10234,7 @@ The following macros control mode switching optimizations: - - @defmac OPTIMIZE_MODE_SWITCHING (@var{entity}) - Define this macro if the port needs extra instructions inserted for mode --switching in an optimizing compilation. -+switching. - - For an example, the SH4 can perform both single and double precision - floating point operations, but to perform a single precision operation, -@@ -10244,73 +10244,88 @@ purpose register as a scratch register, hence these FPSCR sets have to - be inserted before reload, i.e.@: you cannot put this into instruction emitting - or @code{TARGET_MACHINE_DEPENDENT_REORG}. - --You can have multiple entities that are mode-switched, and select at run time --which entities actually need it. @code{OPTIMIZE_MODE_SWITCHING} should --return nonzero for any @var{entity} that needs mode-switching. -+You can have multiple entities that are mode-switched, some of which might -+only be needed conditionally. The entities are identified by their index -+into the @code{NUM_MODES_FOR_MODE_SWITCHING} initializer, with the length -+of the initializer determining the number of entities. -+ -+@code{OPTIMIZE_MODE_SWITCHING} should return nonzero for any @var{entity} -+that needs mode-switching. -+ - If you define this macro, you also have to define - @code{NUM_MODES_FOR_MODE_SWITCHING}, @code{TARGET_MODE_NEEDED}, - @code{TARGET_MODE_PRIORITY} and @code{TARGET_MODE_EMIT}. --@code{TARGET_MODE_AFTER}, @code{TARGET_MODE_ENTRY}, and @code{TARGET_MODE_EXIT} --are optional. -+The other macros in this section are optional. - @end defmac - - @defmac NUM_MODES_FOR_MODE_SWITCHING - If you define @code{OPTIMIZE_MODE_SWITCHING}, you have to define this as - initializer for an array of integers. Each initializer element - N refers to an entity that needs mode switching, and specifies the number --of different modes that might need to be set for this entity. --The position of the initializer in the initializer---starting counting at -+of different modes that are defined for that entity. -+The position of the element in the initializer---starting counting at - zero---determines the integer that is used to refer to the mode-switched - entity in question. --In macros that take mode arguments / yield a mode result, modes are --represented as numbers 0 @dots{} N @minus{} 1. N is used to specify that no mode --switch is needed / supplied. -+Modes are represented as numbers 0 @dots{} N @minus{} 1. -+In mode arguments and return values, N either represents an unknown -+mode or ``no mode'', depending on context. - @end defmac - - @deftypefn {Target Hook} void TARGET_MODE_EMIT (int @var{entity}, int @var{mode}, int @var{prev_mode}, HARD_REG_SET @var{regs_live}) - Generate one or more insns to set @var{entity} to @var{mode}. - @var{hard_reg_live} is the set of hard registers live at the point where - the insn(s) are to be inserted. @var{prev_moxde} indicates the mode --to switch from. Sets of a lower numbered entity will be emitted before -+to switch from, or is the number of modes if the previous mode is not -+known. Sets of a lower numbered entity will be emitted before - sets of a higher numbered entity to a mode of the same or lower priority. - @end deftypefn - - @deftypefn {Target Hook} int TARGET_MODE_NEEDED (int @var{entity}, rtx_insn *@var{insn}) - @var{entity} is an integer specifying a mode-switched entity. --If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this macro --to return an integer value not larger than the corresponding element --in @code{NUM_MODES_FOR_MODE_SWITCHING}, to denote the mode that @var{entity} --must be switched into prior to the execution of @var{insn}. -+If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook -+to return the mode that @var{entity} must be switched into prior to the -+execution of @var{insn}, or the number of modes if @var{insn} has no -+such requirement. - @end deftypefn - - @deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn}) - @var{entity} is an integer specifying a mode-switched entity. --If this macro is defined, it is evaluated for every @var{insn} during mode --switching. It determines the mode that an insn results --in (if different from the incoming mode). -+If this hook is defined, it is evaluated for every @var{insn} during mode -+switching. It returns the mode that @var{entity} is in after @var{insn} -+has been executed. @var{mode} is the mode that @var{entity} was in -+before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}. -+ -+@var{mode} is equal to the number of modes defined for @var{entity} -+if the mode before @var{insn} is unknown. The hook should likewise return -+the number of modes if it does not know what mode @var{entity} has after -+@var{insn}. -+ -+Not defining the hook is equivalent to returning @var{mode}. - @end deftypefn - - @deftypefn {Target Hook} int TARGET_MODE_ENTRY (int @var{entity}) --If this macro is defined, it is evaluated for every @var{entity} that --needs mode switching. It should evaluate to an integer, which is a mode --that @var{entity} is assumed to be switched to at function entry. -+If this hook is defined, it is evaluated for every @var{entity} that -+needs mode switching. It should return the mode that @var{entity} is -+guaranteed to be in on entry to the function, or the number of modes -+if there is no such guarantee. - If @code{TARGET_MODE_ENTRY} is defined then @code{TARGET_MODE_EXIT} - must be defined. - @end deftypefn - - @deftypefn {Target Hook} int TARGET_MODE_EXIT (int @var{entity}) --If this macro is defined, it is evaluated for every @var{entity} that --needs mode switching. It should evaluate to an integer, which is a mode --that @var{entity} is assumed to be switched to at function exit. -+If this hook is defined, it is evaluated for every @var{entity} that -+needs mode switching. It should return the mode that @var{entity} must -+be in on return from the function, or the number of modes if there is no -+such requirement. - If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY} - must be defined. - @end deftypefn - - @deftypefn {Target Hook} int TARGET_MODE_PRIORITY (int @var{entity}, int @var{n}) --This macro specifies the order in which modes for @var{entity} -+This hook specifies the order in which modes for @var{entity} - are processed. 0 is the highest priority, - @code{NUM_MODES_FOR_MODE_SWITCHING[@var{entity}] - 1} the lowest. --The value of the macro should be an integer designating a mode -+The hook returns an integer designating a mode - for @var{entity}. For any fixed @var{entity}, @code{mode_priority} - (@var{entity}, @var{n}) shall be a bijection in 0 @dots{} - @code{num_modes_for_mode_switching[@var{entity}] - 1}. -diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in -index ac95cdf7a..9ec11b15c 100644 ---- a/gcc/doc/tm.texi.in -+++ b/gcc/doc/tm.texi.in -@@ -6879,7 +6879,7 @@ The following macros control mode switching optimizations: - - @defmac OPTIMIZE_MODE_SWITCHING (@var{entity}) - Define this macro if the port needs extra instructions inserted for mode --switching in an optimizing compilation. -+switching. - - For an example, the SH4 can perform both single and double precision - floating point operations, but to perform a single precision operation, -@@ -6889,27 +6889,31 @@ purpose register as a scratch register, hence these FPSCR sets have to - be inserted before reload, i.e.@: you cannot put this into instruction emitting - or @code{TARGET_MACHINE_DEPENDENT_REORG}. - --You can have multiple entities that are mode-switched, and select at run time --which entities actually need it. @code{OPTIMIZE_MODE_SWITCHING} should --return nonzero for any @var{entity} that needs mode-switching. -+You can have multiple entities that are mode-switched, some of which might -+only be needed conditionally. The entities are identified by their index -+into the @code{NUM_MODES_FOR_MODE_SWITCHING} initializer, with the length -+of the initializer determining the number of entities. -+ -+@code{OPTIMIZE_MODE_SWITCHING} should return nonzero for any @var{entity} -+that needs mode-switching. -+ - If you define this macro, you also have to define - @code{NUM_MODES_FOR_MODE_SWITCHING}, @code{TARGET_MODE_NEEDED}, - @code{TARGET_MODE_PRIORITY} and @code{TARGET_MODE_EMIT}. --@code{TARGET_MODE_AFTER}, @code{TARGET_MODE_ENTRY}, and @code{TARGET_MODE_EXIT} --are optional. -+The other macros in this section are optional. - @end defmac - - @defmac NUM_MODES_FOR_MODE_SWITCHING - If you define @code{OPTIMIZE_MODE_SWITCHING}, you have to define this as - initializer for an array of integers. Each initializer element - N refers to an entity that needs mode switching, and specifies the number --of different modes that might need to be set for this entity. --The position of the initializer in the initializer---starting counting at -+of different modes that are defined for that entity. -+The position of the element in the initializer---starting counting at - zero---determines the integer that is used to refer to the mode-switched - entity in question. --In macros that take mode arguments / yield a mode result, modes are --represented as numbers 0 @dots{} N @minus{} 1. N is used to specify that no mode --switch is needed / supplied. -+Modes are represented as numbers 0 @dots{} N @minus{} 1. -+In mode arguments and return values, N either represents an unknown -+mode or ``no mode'', depending on context. - @end defmac - - @hook TARGET_MODE_EMIT -diff --git a/gcc/target.def b/gcc/target.def -index c9bb2b4c2..b87b0f927 100644 ---- a/gcc/target.def -+++ b/gcc/target.def -@@ -6992,51 +6992,62 @@ DEFHOOK - "Generate one or more insns to set @var{entity} to @var{mode}.\n\ - @var{hard_reg_live} is the set of hard registers live at the point where\n\ - the insn(s) are to be inserted. @var{prev_moxde} indicates the mode\n\ --to switch from. Sets of a lower numbered entity will be emitted before\n\ -+to switch from, or is the number of modes if the previous mode is not\n\ -+known. Sets of a lower numbered entity will be emitted before\n\ - sets of a higher numbered entity to a mode of the same or lower priority.", - void, (int entity, int mode, int prev_mode, HARD_REG_SET regs_live), NULL) - - DEFHOOK - (needed, - "@var{entity} is an integer specifying a mode-switched entity.\n\ --If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this macro\n\ --to return an integer value not larger than the corresponding element\n\ --in @code{NUM_MODES_FOR_MODE_SWITCHING}, to denote the mode that @var{entity}\n\ --must be switched into prior to the execution of @var{insn}.", -+If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook\n\ -+to return the mode that @var{entity} must be switched into prior to the\n\ -+execution of @var{insn}, or the number of modes if @var{insn} has no\n\ -+such requirement.", - int, (int entity, rtx_insn *insn), NULL) - - DEFHOOK - (after, - "@var{entity} is an integer specifying a mode-switched entity.\n\ --If this macro is defined, it is evaluated for every @var{insn} during mode\n\ --switching. It determines the mode that an insn results\n\ --in (if different from the incoming mode).", -+If this hook is defined, it is evaluated for every @var{insn} during mode\n\ -+switching. It returns the mode that @var{entity} is in after @var{insn}\n\ -+has been executed. @var{mode} is the mode that @var{entity} was in\n\ -+before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}.\n\ -+\n\ -+@var{mode} is equal to the number of modes defined for @var{entity}\n\ -+if the mode before @var{insn} is unknown. The hook should likewise return\n\ -+the number of modes if it does not know what mode @var{entity} has after\n\ -+@var{insn}.\n\ -+\n\ -+Not defining the hook is equivalent to returning @var{mode}.", - int, (int entity, int mode, rtx_insn *insn), NULL) - - DEFHOOK - (entry, -- "If this macro is defined, it is evaluated for every @var{entity} that\n\ --needs mode switching. It should evaluate to an integer, which is a mode\n\ --that @var{entity} is assumed to be switched to at function entry.\n\ -+ "If this hook is defined, it is evaluated for every @var{entity} that\n\ -+needs mode switching. It should return the mode that @var{entity} is\n\ -+guaranteed to be in on entry to the function, or the number of modes\n\ -+if there is no such guarantee.\n\ - If @code{TARGET_MODE_ENTRY} is defined then @code{TARGET_MODE_EXIT}\n\ - must be defined.", - int, (int entity), NULL) - - DEFHOOK - (exit, -- "If this macro is defined, it is evaluated for every @var{entity} that\n\ --needs mode switching. It should evaluate to an integer, which is a mode\n\ --that @var{entity} is assumed to be switched to at function exit.\n\ -+ "If this hook is defined, it is evaluated for every @var{entity} that\n\ -+needs mode switching. It should return the mode that @var{entity} must\n\ -+be in on return from the function, or the number of modes if there is no\n\ -+such requirement.\n\ - If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY}\n\ - must be defined.", - int, (int entity), NULL) - - DEFHOOK - (priority, -- "This macro specifies the order in which modes for @var{entity}\n\ -+ "This hook specifies the order in which modes for @var{entity}\n\ - are processed. 0 is the highest priority,\n\ - @code{NUM_MODES_FOR_MODE_SWITCHING[@var{entity}] - 1} the lowest.\n\ --The value of the macro should be an integer designating a mode\n\ -+The hook returns an integer designating a mode\n\ - for @var{entity}. For any fixed @var{entity}, @code{mode_priority}\n\ - (@var{entity}, @var{n}) shall be a bijection in 0 @dots{}\n\ - @code{num_modes_for_mode_switching[@var{entity}] - 1}.", --- -2.33.0 - diff --git a/0133-Backport-SME-mode-switching-Add-note-problem.patch b/0133-Backport-SME-mode-switching-Add-note-problem.patch deleted file mode 100644 index 21508da..0000000 --- a/0133-Backport-SME-mode-switching-Add-note-problem.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 7ab54a765239bdd2ce548cffdd5b83f9c20f69da Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Sat, 11 Nov 2023 17:28:55 +0000 -Subject: [PATCH 034/157] [Backport][SME] mode-switching: Add note problem - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3cd3a09b3f91a1d023cb180763d40598d6bb274b - -optimize_mode_switching uses REG_DEAD notes to track register -liveness, but it failed to tell DF to calculate up-to-date notes. - -Noticed by inspection. I don't have a testcase that fails -because of this. - -gcc/ - * mode-switching.cc (optimize_mode_switching): Call - df_note_add_problem. ---- - gcc/mode-switching.cc | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc -index 4cf8f03a0..2a9f98793 100644 ---- a/gcc/mode-switching.cc -+++ b/gcc/mode-switching.cc -@@ -540,6 +540,7 @@ optimize_mode_switching (void) - pre_exit = create_pre_exit (n_entities, entity_map, num_modes); - } - -+ df_note_add_problem (); - df_analyze (); - - /* Create the bitmap vectors. */ --- -2.33.0 - diff --git a/0134-Backport-SME-mode-switching-Avoid-quadractic-list-op.patch b/0134-Backport-SME-mode-switching-Avoid-quadractic-list-op.patch deleted file mode 100644 index e74c104..0000000 --- a/0134-Backport-SME-mode-switching-Avoid-quadractic-list-op.patch +++ /dev/null @@ -1,90 +0,0 @@ -From a2a8b560c1749293d3b6d027e20753a7ea042c80 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Sat, 11 Nov 2023 17:28:55 +0000 -Subject: [PATCH 035/157] [Backport][SME] mode-switching: Avoid quadractic list - operation - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=174ee5115a3004d3664165e9d619535b579111d4 - -add_seginfo chained insn information to the end of a list -by starting at the head of the list. This patch avoids the -quadraticness by keeping track of the tail pointer. - -gcc/ - * mode-switching.cc (add_seginfo): Replace head pointer with - a pointer to the tail pointer. - (optimize_mode_switching): Update calls accordingly. ---- - gcc/mode-switching.cc | 24 ++++++++---------------- - 1 file changed, 8 insertions(+), 16 deletions(-) - -diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc -index 2a9f98793..6a13951c9 100644 ---- a/gcc/mode-switching.cc -+++ b/gcc/mode-switching.cc -@@ -162,23 +162,14 @@ new_seginfo (int mode, rtx_insn *insn, const HARD_REG_SET ®s_live) - } - - /* Add a seginfo element to the end of a list. -- HEAD is a pointer to the list beginning. -+ TAIL is a pointer to the list's null terminator. - INFO is the structure to be linked in. */ - - static void --add_seginfo (struct bb_info *head, struct seginfo *info) -+add_seginfo (struct seginfo ***tail_ptr, struct seginfo *info) - { -- struct seginfo *ptr; -- -- if (head->seginfo == NULL) -- head->seginfo = info; -- else -- { -- ptr = head->seginfo; -- while (ptr->next != NULL) -- ptr = ptr->next; -- ptr->next = info; -- } -+ **tail_ptr = info; -+ *tail_ptr = &info->next; - } - - /* Record in LIVE that register REG died. */ -@@ -573,6 +564,7 @@ optimize_mode_switching (void) - Also compute the initial transparency settings. */ - FOR_EACH_BB_FN (bb, cfun) - { -+ struct seginfo **tail_ptr = &info[bb->index].seginfo; - struct seginfo *ptr; - int last_mode = no_mode; - bool any_set_required = false; -@@ -598,7 +590,7 @@ optimize_mode_switching (void) - if (ins_pos != BB_END (bb)) - ins_pos = NEXT_INSN (ins_pos); - ptr = new_seginfo (no_mode, ins_pos, live_now); -- add_seginfo (info + bb->index, ptr); -+ add_seginfo (&tail_ptr, ptr); - for (i = 0; i < no_mode; i++) - clear_mode_bit (transp[bb->index], j, i); - } -@@ -616,7 +608,7 @@ optimize_mode_switching (void) - any_set_required = true; - last_mode = mode; - ptr = new_seginfo (mode, insn, live_now); -- add_seginfo (info + bb->index, ptr); -+ add_seginfo (&tail_ptr, ptr); - for (i = 0; i < no_mode; i++) - clear_mode_bit (transp[bb->index], j, i); - } -@@ -645,7 +637,7 @@ optimize_mode_switching (void) - if (!any_set_required) - { - ptr = new_seginfo (no_mode, BB_END (bb), live_now); -- add_seginfo (info + bb->index, ptr); -+ add_seginfo (&tail_ptr, ptr); - if (last_mode != no_mode) - for (i = 0; i < no_mode; i++) - clear_mode_bit (transp[bb->index], j, i); --- -2.33.0 - diff --git a/0135-Backport-SME-mode-switching-Fix-the-mode-passed-to-t.patch b/0135-Backport-SME-mode-switching-Fix-the-mode-passed-to-t.patch deleted file mode 100644 index c1b4036..0000000 --- a/0135-Backport-SME-mode-switching-Fix-the-mode-passed-to-t.patch +++ /dev/null @@ -1,136 +0,0 @@ -From 194700063ed04b56d84912f7ace1b8370af6c696 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Sat, 11 Nov 2023 17:28:56 +0000 -Subject: [PATCH 036/157] [Backport][SME] mode-switching: Fix the mode passed - to the emit hook - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5afd208beaef50bcc43b556d4c41d41656b06436 - -optimize_mode_switching passes an entity's current mode (if known) -to the emit hook. However, the mode that it passed ignored the -effect of the after hook. Instead, the mode for the first emit -call in a block was taken from the incoming mode, whereas the -mode for each subsequent emit call was taken from the result -of the previous call. - -The previous pass through the insns already calculated the -correct mode, so this patch records it in the seginfo structure. -(There was a 32-bit hole on 64-bit hosts, so this doesn't increase -the size of the structure for them.) - -gcc/ - * mode-switching.cc (seginfo): Add a prev_mode field. - (new_seginfo): Take and initialize the prev_mode. - (optimize_mode_switching): Update calls accordingly. - Use the recorded modes during the emit phase, rather than - computing one on the fly. ---- - gcc/mode-switching.cc | 30 +++++++++++++++++------------- - 1 file changed, 17 insertions(+), 13 deletions(-) - -diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc -index 6a13951c9..584cd4f67 100644 ---- a/gcc/mode-switching.cc -+++ b/gcc/mode-switching.cc -@@ -68,6 +68,7 @@ along with GCC; see the file COPYING3. If not see - NEXT is the next insn in the same basic block. */ - struct seginfo - { -+ int prev_mode; - int mode; - rtx_insn *insn_ptr; - struct seginfo *next; -@@ -140,20 +141,22 @@ commit_mode_sets (struct edge_list *edge_list, int e, struct bb_info *info) - return need_commit; - } - --/* Allocate a new BBINFO structure, initialized with the MODE, INSN, -- and REGS_LIVE parameters. -+/* Allocate a new BBINFO structure, initialized with the PREV_MODE, MODE, -+ INSN, and REGS_LIVE parameters. - INSN may not be a NOTE_INSN_BASIC_BLOCK, unless it is an empty - basic block; that allows us later to insert instructions in a FIFO-like - manner. */ - - static struct seginfo * --new_seginfo (int mode, rtx_insn *insn, const HARD_REG_SET ®s_live) -+new_seginfo (int prev_mode, int mode, rtx_insn *insn, -+ const HARD_REG_SET ®s_live) - { - struct seginfo *ptr; - - gcc_assert (!NOTE_INSN_BASIC_BLOCK_P (insn) - || insn == BB_END (NOTE_BASIC_BLOCK (insn))); - ptr = XNEW (struct seginfo); -+ ptr->prev_mode = prev_mode; - ptr->mode = mode; - ptr->insn_ptr = insn; - ptr->next = NULL; -@@ -589,7 +592,7 @@ optimize_mode_switching (void) - gcc_assert (NOTE_INSN_BASIC_BLOCK_P (ins_pos)); - if (ins_pos != BB_END (bb)) - ins_pos = NEXT_INSN (ins_pos); -- ptr = new_seginfo (no_mode, ins_pos, live_now); -+ ptr = new_seginfo (no_mode, no_mode, ins_pos, live_now); - add_seginfo (&tail_ptr, ptr); - for (i = 0; i < no_mode; i++) - clear_mode_bit (transp[bb->index], j, i); -@@ -605,12 +608,12 @@ optimize_mode_switching (void) - - if (mode != no_mode && mode != last_mode) - { -- any_set_required = true; -- last_mode = mode; -- ptr = new_seginfo (mode, insn, live_now); -+ ptr = new_seginfo (last_mode, mode, insn, live_now); - add_seginfo (&tail_ptr, ptr); - for (i = 0; i < no_mode; i++) - clear_mode_bit (transp[bb->index], j, i); -+ any_set_required = true; -+ last_mode = mode; - } - - if (targetm.mode_switching.after) -@@ -636,7 +639,7 @@ optimize_mode_switching (void) - mark the block as nontransparent. */ - if (!any_set_required) - { -- ptr = new_seginfo (no_mode, BB_END (bb), live_now); -+ ptr = new_seginfo (last_mode, no_mode, BB_END (bb), live_now); - add_seginfo (&tail_ptr, ptr); - if (last_mode != no_mode) - for (i = 0; i < no_mode; i++) -@@ -777,9 +780,9 @@ optimize_mode_switching (void) - FOR_EACH_BB_FN (bb, cfun) - { - struct seginfo *ptr, *next; -- int cur_mode = bb_info[j][bb->index].mode_in; -+ struct seginfo *first = bb_info[j][bb->index].seginfo; - -- for (ptr = bb_info[j][bb->index].seginfo; ptr; ptr = next) -+ for (ptr = first; ptr; ptr = next) - { - next = ptr->next; - if (ptr->mode != no_mode) -@@ -789,14 +792,15 @@ optimize_mode_switching (void) - rtl_profile_for_bb (bb); - start_sequence (); - -+ int cur_mode = (ptr == first && ptr->prev_mode == no_mode -+ ? bb_info[j][bb->index].mode_in -+ : ptr->prev_mode); -+ - targetm.mode_switching.emit (entity_map[j], ptr->mode, - cur_mode, ptr->regs_live); - mode_set = get_insns (); - end_sequence (); - -- /* modes kill each other inside a basic block. */ -- cur_mode = ptr->mode; -- - /* Insert MODE_SET only if it is nonempty. */ - if (mode_set != NULL_RTX) - { --- -2.33.0 - diff --git a/0136-Backport-SME-mode-switching-Simplify-recording-of-tr.patch b/0136-Backport-SME-mode-switching-Simplify-recording-of-tr.patch deleted file mode 100644 index 1b99d67..0000000 --- a/0136-Backport-SME-mode-switching-Simplify-recording-of-tr.patch +++ /dev/null @@ -1,103 +0,0 @@ -From ac51d446ee605e942b0831d3ff617980d94bf502 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Sat, 11 Nov 2023 17:28:56 +0000 -Subject: [PATCH 037/157] [Backport][SME] mode-switching: Simplify recording of - transparency - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=335b55f4146c5ef9e3bf4bcb7e58e887c3150b02 - -For a given block, an entity is either transparent for -all modes or for none. Each update to the transparency set -therefore used a loop like: - - for (i = 0; i < no_mode; i++) - clear_mode_bit (transp[bb->index], j, i); - -This patch instead starts out with a bit-per-block bitmap -and updates the main bitmap at the end. - -This isn't much of a simplification on its own. The main -purpose is to simplify later patches. - -gcc/ - * mode-switching.cc (optimize_mode_switching): Initially - compute transparency in a bit-per-block bitmap. ---- - gcc/mode-switching.cc | 19 +++++++++++-------- - 1 file changed, 11 insertions(+), 8 deletions(-) - -diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc -index 584cd4f67..4d2b9e284 100644 ---- a/gcc/mode-switching.cc -+++ b/gcc/mode-switching.cc -@@ -555,6 +555,8 @@ optimize_mode_switching (void) - bitmap_vector_clear (antic, last_basic_block_for_fn (cfun)); - bitmap_vector_clear (comp, last_basic_block_for_fn (cfun)); - -+ auto_sbitmap transp_all (last_basic_block_for_fn (cfun)); -+ - for (j = n_entities - 1; j >= 0; j--) - { - int e = entity_map[j]; -@@ -562,6 +564,8 @@ optimize_mode_switching (void) - struct bb_info *info = bb_info[j]; - rtx_insn *insn; - -+ bitmap_ones (transp_all); -+ - /* Determine what the first use (if any) need for a mode of entity E is. - This will be the mode that is anticipatable for this block. - Also compute the initial transparency settings. */ -@@ -594,8 +598,7 @@ optimize_mode_switching (void) - ins_pos = NEXT_INSN (ins_pos); - ptr = new_seginfo (no_mode, no_mode, ins_pos, live_now); - add_seginfo (&tail_ptr, ptr); -- for (i = 0; i < no_mode; i++) -- clear_mode_bit (transp[bb->index], j, i); -+ bitmap_clear_bit (transp_all, bb->index); - } - } - -@@ -610,8 +613,7 @@ optimize_mode_switching (void) - { - ptr = new_seginfo (last_mode, mode, insn, live_now); - add_seginfo (&tail_ptr, ptr); -- for (i = 0; i < no_mode; i++) -- clear_mode_bit (transp[bb->index], j, i); -+ bitmap_clear_bit (transp_all, bb->index); - any_set_required = true; - last_mode = mode; - } -@@ -642,8 +644,7 @@ optimize_mode_switching (void) - ptr = new_seginfo (last_mode, no_mode, BB_END (bb), live_now); - add_seginfo (&tail_ptr, ptr); - if (last_mode != no_mode) -- for (i = 0; i < no_mode; i++) -- clear_mode_bit (transp[bb->index], j, i); -+ bitmap_clear_bit (transp_all, bb->index); - } - } - if (targetm.mode_switching.entry && targetm.mode_switching.exit) -@@ -666,8 +667,7 @@ optimize_mode_switching (void) - an extra check in make_preds_opaque. We also - need this to avoid confusing pre_edge_lcm when - antic is cleared but transp and comp are set. */ -- for (i = 0; i < no_mode; i++) -- clear_mode_bit (transp[bb->index], j, i); -+ bitmap_clear_bit (transp_all, bb->index); - - /* Insert a fake computing definition of MODE into entry - blocks which compute no mode. This represents the mode on -@@ -687,6 +687,9 @@ optimize_mode_switching (void) - - FOR_EACH_BB_FN (bb, cfun) - { -+ if (!bitmap_bit_p (transp_all, bb->index)) -+ clear_mode_bit (transp[bb->index], j, m); -+ - if (info[bb->index].seginfo->mode == m) - set_mode_bit (antic[bb->index], j, m); - --- -2.33.0 - diff --git a/0137-Backport-SME-mode-switching-Tweak-entry-exit-handlin.patch b/0137-Backport-SME-mode-switching-Tweak-entry-exit-handlin.patch deleted file mode 100644 index 8444847..0000000 --- a/0137-Backport-SME-mode-switching-Tweak-entry-exit-handlin.patch +++ /dev/null @@ -1,92 +0,0 @@ -From c0aaf329d9c547b249ac120a8d1995d8546a1edb Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Sat, 11 Nov 2023 17:28:57 +0000 -Subject: [PATCH 038/157] [Backport][SME] mode-switching: Tweak entry/exit - handling - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e59ec35276599805cdc6c3979d8a167b027d286e - -An entity isn't transparent in a block that requires a specific mode. -optimize_mode_switching took that into account for normal insns, -but didn't for the exit block. Later patches misbehaved because -of this. - -In contrast, an entity was correctly marked as non-transparent -in the entry block, but the reasoning seemed a bit convoluted. -It also referred to a function that no longer exists. -Since KILL = ~TRANSP, the entity is by definition not transparent -in a block that defines the entity, so I think we can make it so -without comment. - -Finally, the exit handling was nested in the entry handling, -but that doesn't seem necessary. A target could say that an -entity is undefined on entry but must be defined on return, -on a "be liberal in what you accept, be conservative in what -you do" principle. - -gcc/ - * mode-switching.cc (optimize_mode_switching): Mark the exit - block as nontransparent if it requires a specific mode. - Handle the entry and exit mode as sibling rather than nested - concepts. Remove outdated comment. ---- - gcc/mode-switching.cc | 34 +++++++++++++++------------------- - 1 file changed, 15 insertions(+), 19 deletions(-) - -diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc -index 4d2b9e284..4761c2ff0 100644 ---- a/gcc/mode-switching.cc -+++ b/gcc/mode-switching.cc -@@ -649,34 +649,30 @@ optimize_mode_switching (void) - } - if (targetm.mode_switching.entry && targetm.mode_switching.exit) - { -- int mode = targetm.mode_switching.entry (e); -- - info[post_entry->index].mode_out = - info[post_entry->index].mode_in = no_mode; -- if (pre_exit) -- { -- info[pre_exit->index].mode_out = -- info[pre_exit->index].mode_in = no_mode; -- } - -+ int mode = targetm.mode_switching.entry (e); - if (mode != no_mode) - { -- bb = post_entry; -- -- /* By always making this nontransparent, we save -- an extra check in make_preds_opaque. We also -- need this to avoid confusing pre_edge_lcm when -- antic is cleared but transp and comp are set. */ -- bitmap_clear_bit (transp_all, bb->index); -- - /* Insert a fake computing definition of MODE into entry - blocks which compute no mode. This represents the mode on - entry. */ -- info[bb->index].computing = mode; -+ info[post_entry->index].computing = mode; -+ bitmap_clear_bit (transp_all, post_entry->index); -+ } - -- if (pre_exit) -- info[pre_exit->index].seginfo->mode = -- targetm.mode_switching.exit (e); -+ if (pre_exit) -+ { -+ info[pre_exit->index].mode_out = -+ info[pre_exit->index].mode_in = no_mode; -+ -+ int mode = targetm.mode_switching.exit (e); -+ if (mode != no_mode) -+ { -+ info[pre_exit->index].seginfo->mode = mode; -+ bitmap_clear_bit (transp_all, pre_exit->index); -+ } - } - } - --- -2.33.0 - diff --git a/0138-Backport-SME-mode-switching-Allow-targets-to-set-the.patch b/0138-Backport-SME-mode-switching-Allow-targets-to-set-the.patch deleted file mode 100644 index 8d066b2..0000000 --- a/0138-Backport-SME-mode-switching-Allow-targets-to-set-the.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 9505464aec8f95125293c64e2eea9577e9be4700 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Sat, 11 Nov 2023 17:28:57 +0000 -Subject: [PATCH 039/157] [Backport][SME] mode-switching: Allow targets to set - the mode for EH handlers - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4b803fbf839439b1deca660e32d5ced211111dfa - -The mode-switching pass already had hooks to say what mode -an entity is in on entry to a function and what mode it must -be in on return. For SME, we also want to say what mode an -entity is guaranteed to be in on entry to an exception handler. - -gcc/ - * target.def (mode_switching.eh_handler): New hook. - * doc/tm.texi.in (TARGET_MODE_EH_HANDLER): New @hook. - * doc/tm.texi: Regenerate. - * mode-switching.cc (optimize_mode_switching): Use eh_handler - to get the mode on entry to an exception handler. ---- - gcc/doc/tm.texi | 6 ++++++ - gcc/doc/tm.texi.in | 2 ++ - gcc/mode-switching.cc | 5 ++++- - gcc/target.def | 7 +++++++ - 4 files changed, 19 insertions(+), 1 deletion(-) - -diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi -index 553aa4cf2..4788b3f7a 100644 ---- a/gcc/doc/tm.texi -+++ b/gcc/doc/tm.texi -@@ -10321,6 +10321,12 @@ If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY} - must be defined. - @end deftypefn - -+@deftypefn {Target Hook} int TARGET_MODE_EH_HANDLER (int @var{entity}) -+If this hook is defined, it should return the mode that @var{entity} is -+guaranteed to be in on entry to an exception handler, or the number of modes -+if there is no such guarantee. -+@end deftypefn -+ - @deftypefn {Target Hook} int TARGET_MODE_PRIORITY (int @var{entity}, int @var{n}) - This hook specifies the order in which modes for @var{entity} - are processed. 0 is the highest priority, -diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in -index 9ec11b15c..ad343504f 100644 ---- a/gcc/doc/tm.texi.in -+++ b/gcc/doc/tm.texi.in -@@ -6926,6 +6926,8 @@ mode or ``no mode'', depending on context. - - @hook TARGET_MODE_EXIT - -+@hook TARGET_MODE_EH_HANDLER -+ - @hook TARGET_MODE_PRIORITY - - @node Target Attributes -diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc -index 4761c2ff0..9a6ba6cca 100644 ---- a/gcc/mode-switching.cc -+++ b/gcc/mode-switching.cc -@@ -596,7 +596,10 @@ optimize_mode_switching (void) - gcc_assert (NOTE_INSN_BASIC_BLOCK_P (ins_pos)); - if (ins_pos != BB_END (bb)) - ins_pos = NEXT_INSN (ins_pos); -- ptr = new_seginfo (no_mode, no_mode, ins_pos, live_now); -+ if (bb_has_eh_pred (bb) -+ && targetm.mode_switching.eh_handler) -+ last_mode = targetm.mode_switching.eh_handler (e); -+ ptr = new_seginfo (no_mode, last_mode, ins_pos, live_now); - add_seginfo (&tail_ptr, ptr); - bitmap_clear_bit (transp_all, bb->index); - } -diff --git a/gcc/target.def b/gcc/target.def -index b87b0f927..bbb482de6 100644 ---- a/gcc/target.def -+++ b/gcc/target.def -@@ -7042,6 +7042,13 @@ If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY}\n\ - must be defined.", - int, (int entity), NULL) - -+DEFHOOK -+(eh_handler, -+ "If this hook is defined, it should return the mode that @var{entity} is\n\ -+guaranteed to be in on entry to an exception handler, or the number of modes\n\ -+if there is no such guarantee.", -+ int, (int entity), NULL) -+ - DEFHOOK - (priority, - "This hook specifies the order in which modes for @var{entity}\n\ --- -2.33.0 - diff --git a/0139-Backport-SME-mode-switching-Pass-set-of-live-registe.patch b/0139-Backport-SME-mode-switching-Pass-set-of-live-registe.patch deleted file mode 100644 index 037a4f7..0000000 --- a/0139-Backport-SME-mode-switching-Pass-set-of-live-registe.patch +++ /dev/null @@ -1,211 +0,0 @@ -From a6964e11c7f624cdaed2c9608565a5968292b70f Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Sat, 11 Nov 2023 17:28:58 +0000 -Subject: [PATCH 040/157] [Backport][SME] mode-switching: Pass set of live - registers to the needed hook - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=29d3e1892ebec8abce784077d1f1a3e21d763218 - -The emit hook already takes the set of live hard registers as input. -This patch passes it to the needed hook too. SME uses this to -optimise the mode choice based on whether state is live or dead. - -The main caller already had access to the required info, but the -special handling of return values did not. - -gcc/ - * target.def (mode_switching.needed): Add a regs_live parameter. - * doc/tm.texi: Regenerate. - * config/epiphany/epiphany-protos.h (epiphany_mode_needed): Update - accordingly. - * config/epiphany/epiphany.cc (epiphany_mode_needed): Likewise. - * config/epiphany/mode-switch-use.cc (insert_uses): Likewise. - * config/i386/i386.cc (ix86_mode_needed): Likewise. - * config/riscv/riscv.cc (riscv_mode_needed): Likewise. - * config/sh/sh.cc (sh_mode_needed): Likewise. - * mode-switching.cc (optimize_mode_switching): Likewise. - (create_pre_exit): Likewise, using the DF simulate functions - to calculate the required information. ---- - gcc/config/epiphany/epiphany-protos.h | 4 +++- - gcc/config/epiphany/epiphany.cc | 2 +- - gcc/config/epiphany/mode-switch-use.cc | 2 +- - gcc/config/i386/i386.cc | 2 +- - gcc/config/sh/sh.cc | 4 ++-- - gcc/doc/tm.texi | 5 +++-- - gcc/mode-switching.cc | 14 ++++++++++++-- - gcc/target.def | 5 +++-- - 8 files changed, 26 insertions(+), 12 deletions(-) - -diff --git a/gcc/config/epiphany/epiphany-protos.h b/gcc/config/epiphany/epiphany-protos.h -index 61b63234e..d463e5483 100644 ---- a/gcc/config/epiphany/epiphany-protos.h -+++ b/gcc/config/epiphany/epiphany-protos.h -@@ -44,7 +44,9 @@ extern void emit_set_fp_mode (int entity, int mode, int prev_mode, - #endif - extern void epiphany_insert_mode_switch_use (rtx_insn *insn, int, int); - extern void epiphany_expand_set_fp_mode (rtx *operands); --extern int epiphany_mode_needed (int entity, rtx_insn *insn); -+#ifdef HARD_CONST -+extern int epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET); -+#endif - extern int epiphany_mode_after (int entity, int last_mode, rtx_insn *insn); - extern bool epiphany_epilogue_uses (int regno); - extern bool epiphany_optimize_mode_switching (int entity); -diff --git a/gcc/config/epiphany/epiphany.cc b/gcc/config/epiphany/epiphany.cc -index f8c049340..be0fbc68c 100644 ---- a/gcc/config/epiphany/epiphany.cc -+++ b/gcc/config/epiphany/epiphany.cc -@@ -2400,7 +2400,7 @@ epiphany_mode_priority (int entity, int priority) - } - - int --epiphany_mode_needed (int entity, rtx_insn *insn) -+epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET) - { - enum attr_fp_mode mode; - -diff --git a/gcc/config/epiphany/mode-switch-use.cc b/gcc/config/epiphany/mode-switch-use.cc -index 887550a33..cacb1ce5a 100644 ---- a/gcc/config/epiphany/mode-switch-use.cc -+++ b/gcc/config/epiphany/mode-switch-use.cc -@@ -58,7 +58,7 @@ insert_uses (void) - { - if (!INSN_P (insn)) - continue; -- mode = epiphany_mode_needed (e, insn); -+ mode = epiphany_mode_needed (e, insn, {}); - if (mode == no_mode) - continue; - if (target_insert_mode_switch_use) -diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc -index 60f3296b0..4d591d217 100644 ---- a/gcc/config/i386/i386.cc -+++ b/gcc/config/i386/i386.cc -@@ -14522,7 +14522,7 @@ ix86_i387_mode_needed (int entity, rtx_insn *insn) - prior to the execution of insn. */ - - static int --ix86_mode_needed (int entity, rtx_insn *insn) -+ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET) - { - switch (entity) - { -diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc -index 03e1c04ec..85e83e12e 100644 ---- a/gcc/config/sh/sh.cc -+++ b/gcc/config/sh/sh.cc -@@ -195,7 +195,7 @@ static int calc_live_regs (HARD_REG_SET *); - static HOST_WIDE_INT rounded_frame_size (int); - static bool sh_frame_pointer_required (void); - static void sh_emit_mode_set (int, int, int, HARD_REG_SET); --static int sh_mode_needed (int, rtx_insn *); -+static int sh_mode_needed (int, rtx_insn *, HARD_REG_SET); - static int sh_mode_after (int, int, rtx_insn *); - static int sh_mode_entry (int); - static int sh_mode_exit (int); -@@ -12529,7 +12529,7 @@ sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode, - } - - static int --sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn) -+sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn, HARD_REG_SET) - { - return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE; - } -diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi -index 4788b3f7a..d8ac6c4d6 100644 ---- a/gcc/doc/tm.texi -+++ b/gcc/doc/tm.texi -@@ -10280,12 +10280,13 @@ known. Sets of a lower numbered entity will be emitted before - sets of a higher numbered entity to a mode of the same or lower priority. - @end deftypefn - --@deftypefn {Target Hook} int TARGET_MODE_NEEDED (int @var{entity}, rtx_insn *@var{insn}) -+@deftypefn {Target Hook} int TARGET_MODE_NEEDED (int @var{entity}, rtx_insn *@var{insn}, HARD_REG_SET @var{regs_live}) - @var{entity} is an integer specifying a mode-switched entity. - If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook - to return the mode that @var{entity} must be switched into prior to the - execution of @var{insn}, or the number of modes if @var{insn} has no --such requirement. -+such requirement. @var{regs_live} contains the set of hard registers -+that are live before @var{insn}. - @end deftypefn - - @deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn}) -diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc -index 9a6ba6cca..6bbda5058 100644 ---- a/gcc/mode-switching.cc -+++ b/gcc/mode-switching.cc -@@ -254,6 +254,9 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes) - && GET_CODE (PATTERN (last_insn)) == USE - && GET_CODE ((ret_reg = XEXP (PATTERN (last_insn), 0))) == REG) - { -+ auto_bitmap live; -+ df_simulate_initialize_backwards (src_bb, live); -+ - int ret_start = REGNO (ret_reg); - int nregs = REG_NREGS (ret_reg); - int ret_end = ret_start + nregs; -@@ -262,6 +265,8 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes) - bool forced_late_switch = false; - rtx_insn *before_return_copy; - -+ df_simulate_one_insn_backwards (src_bb, last_insn, live); -+ - do - { - rtx_insn *return_copy = PREV_INSN (last_insn); -@@ -269,6 +274,8 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes) - int copy_start, copy_num; - int j; - -+ df_simulate_one_insn_backwards (src_bb, return_copy, live); -+ - if (NONDEBUG_INSN_P (return_copy)) - { - /* When using SJLJ exceptions, the call to the -@@ -368,11 +375,14 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes) - the case for floating point on SH4 - then it might - be set by an arithmetic operation that needs a - different mode than the exit block. */ -+ HARD_REG_SET hard_regs_live; -+ REG_SET_TO_HARD_REG_SET (hard_regs_live, live); - for (j = n_entities - 1; j >= 0; j--) - { - int e = entity_map[j]; - int mode = -- targetm.mode_switching.needed (e, return_copy); -+ targetm.mode_switching.needed (e, return_copy, -+ hard_regs_live); - - if (mode != num_modes[e] - && mode != targetm.mode_switching.exit (e)) -@@ -609,7 +619,7 @@ optimize_mode_switching (void) - { - if (INSN_P (insn)) - { -- int mode = targetm.mode_switching.needed (e, insn); -+ int mode = targetm.mode_switching.needed (e, insn, live_now); - rtx link; - - if (mode != no_mode && mode != last_mode) -diff --git a/gcc/target.def b/gcc/target.def -index bbb482de6..06a52bdaf 100644 ---- a/gcc/target.def -+++ b/gcc/target.def -@@ -7003,8 +7003,9 @@ DEFHOOK - If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook\n\ - to return the mode that @var{entity} must be switched into prior to the\n\ - execution of @var{insn}, or the number of modes if @var{insn} has no\n\ --such requirement.", -- int, (int entity, rtx_insn *insn), NULL) -+such requirement. @var{regs_live} contains the set of hard registers\n\ -+that are live before @var{insn}.", -+ int, (int entity, rtx_insn *insn, HARD_REG_SET regs_live), NULL) - - DEFHOOK - (after, --- -2.33.0 - diff --git a/0140-Backport-SME-mode-switching-Pass-the-set-of-live-reg.patch b/0140-Backport-SME-mode-switching-Pass-the-set-of-live-reg.patch deleted file mode 100644 index da76e1b..0000000 --- a/0140-Backport-SME-mode-switching-Pass-the-set-of-live-reg.patch +++ /dev/null @@ -1,177 +0,0 @@ -From 4457604c11c0a32f3736d73429d1e5fb7baae3a5 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Sat, 11 Nov 2023 17:28:58 +0000 -Subject: [PATCH 041/157] [Backport][SME] mode-switching: Pass the set of live - registers to the after hook - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=93d65f39bc5c3dc318deb6da0e3633f3a4c6c34d - -This patch passes the set of live hard registers to the after hook, -like the previous one did for the needed hook. - -gcc/ - * target.def (mode_switching.after): Add a regs_live parameter. - * doc/tm.texi: Regenerate. - * config/epiphany/epiphany-protos.h (epiphany_mode_after): Update - accordingly. - * config/epiphany/epiphany.cc (epiphany_mode_needed): Likewise. - (epiphany_mode_after): Likewise. - * config/i386/i386.cc (ix86_mode_after): Likewise. - * config/riscv/riscv.cc (riscv_mode_after): Likewise. - * config/sh/sh.cc (sh_mode_after): Likewise. - * mode-switching.cc (optimize_mode_switching): Likewise. ---- - gcc/config/epiphany/epiphany-protos.h | 3 ++- - gcc/config/epiphany/epiphany.cc | 5 +++-- - gcc/config/i386/i386.cc | 2 +- - gcc/config/sh/sh.cc | 5 +++-- - gcc/doc/tm.texi | 4 +++- - gcc/mode-switching.cc | 8 ++++---- - gcc/target.def | 4 +++- - 7 files changed, 19 insertions(+), 12 deletions(-) - -diff --git a/gcc/config/epiphany/epiphany-protos.h b/gcc/config/epiphany/epiphany-protos.h -index d463e5483..6326b7e80 100644 ---- a/gcc/config/epiphany/epiphany-protos.h -+++ b/gcc/config/epiphany/epiphany-protos.h -@@ -46,8 +46,9 @@ extern void epiphany_insert_mode_switch_use (rtx_insn *insn, int, int); - extern void epiphany_expand_set_fp_mode (rtx *operands); - #ifdef HARD_CONST - extern int epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET); -+extern int epiphany_mode_after (int entity, int last_mode, rtx_insn *insn, -+ HARD_REG_SET); - #endif --extern int epiphany_mode_after (int entity, int last_mode, rtx_insn *insn); - extern bool epiphany_epilogue_uses (int regno); - extern bool epiphany_optimize_mode_switching (int entity); - extern bool epiphany_is_interrupt_p (tree); -diff --git a/gcc/config/epiphany/epiphany.cc b/gcc/config/epiphany/epiphany.cc -index be0fbc68c..62636b1ec 100644 ---- a/gcc/config/epiphany/epiphany.cc -+++ b/gcc/config/epiphany/epiphany.cc -@@ -2437,7 +2437,7 @@ epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET) - return 2; - case EPIPHANY_MSW_ENTITY_ROUND_KNOWN: - if (recog_memoized (insn) == CODE_FOR_set_fp_mode) -- mode = (enum attr_fp_mode) epiphany_mode_after (entity, mode, insn); -+ mode = (enum attr_fp_mode) epiphany_mode_after (entity, mode, insn, {}); - /* Fall through. */ - case EPIPHANY_MSW_ENTITY_NEAREST: - case EPIPHANY_MSW_ENTITY_TRUNC: -@@ -2498,7 +2498,8 @@ epiphany_mode_entry_exit (int entity, bool exit) - } - - int --epiphany_mode_after (int entity, int last_mode, rtx_insn *insn) -+epiphany_mode_after (int entity, int last_mode, rtx_insn *insn, -+ HARD_REG_SET) - { - /* We have too few call-saved registers to hope to keep the masks across - calls. */ -diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc -index 4d591d217..593185fa6 100644 ---- a/gcc/config/i386/i386.cc -+++ b/gcc/config/i386/i386.cc -@@ -14583,7 +14583,7 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn) - /* Return the mode that an insn results in. */ - - static int --ix86_mode_after (int entity, int mode, rtx_insn *insn) -+ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET) - { - switch (entity) - { -diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc -index 85e83e12e..74d61c43b 100644 ---- a/gcc/config/sh/sh.cc -+++ b/gcc/config/sh/sh.cc -@@ -196,7 +196,7 @@ static HOST_WIDE_INT rounded_frame_size (int); - static bool sh_frame_pointer_required (void); - static void sh_emit_mode_set (int, int, int, HARD_REG_SET); - static int sh_mode_needed (int, rtx_insn *, HARD_REG_SET); --static int sh_mode_after (int, int, rtx_insn *); -+static int sh_mode_after (int, int, rtx_insn *, HARD_REG_SET); - static int sh_mode_entry (int); - static int sh_mode_exit (int); - static int sh_mode_priority (int entity, int n); -@@ -12535,7 +12535,8 @@ sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn, HARD_REG_SET) - } - - static int --sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn) -+sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn, -+ HARD_REG_SET) - { - if (TARGET_HITACHI && recog_memoized (insn) >= 0 && - get_attr_fp_set (insn) != FP_SET_NONE) -diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi -index d8ac6c4d6..7fce485b2 100644 ---- a/gcc/doc/tm.texi -+++ b/gcc/doc/tm.texi -@@ -10289,12 +10289,14 @@ such requirement. @var{regs_live} contains the set of hard registers - that are live before @var{insn}. - @end deftypefn - --@deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn}) -+@deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn}, HARD_REG_SET @var{regs_live}) - @var{entity} is an integer specifying a mode-switched entity. - If this hook is defined, it is evaluated for every @var{insn} during mode - switching. It returns the mode that @var{entity} is in after @var{insn} - has been executed. @var{mode} is the mode that @var{entity} was in - before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}. -+@var{regs_live} is the set of hard registers that are live after @var{insn} -+has been executed. - - @var{mode} is equal to the number of modes defined for @var{entity} - if the mode before @var{insn} is unknown. The hook should likewise return -diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc -index 6bbda5058..4f0445894 100644 ---- a/gcc/mode-switching.cc -+++ b/gcc/mode-switching.cc -@@ -631,10 +631,6 @@ optimize_mode_switching (void) - last_mode = mode; - } - -- if (targetm.mode_switching.after) -- last_mode = targetm.mode_switching.after (e, last_mode, -- insn); -- - /* Update LIVE_NOW. */ - for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) - if (REG_NOTE_KIND (link) == REG_DEAD) -@@ -644,6 +640,10 @@ optimize_mode_switching (void) - for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) - if (REG_NOTE_KIND (link) == REG_UNUSED) - reg_dies (XEXP (link, 0), &live_now); -+ -+ if (targetm.mode_switching.after) -+ last_mode = targetm.mode_switching.after (e, last_mode, -+ insn, live_now); - } - } - -diff --git a/gcc/target.def b/gcc/target.def -index 06a52bdaf..67c20bbb0 100644 ---- a/gcc/target.def -+++ b/gcc/target.def -@@ -7014,6 +7014,8 @@ If this hook is defined, it is evaluated for every @var{insn} during mode\n\ - switching. It returns the mode that @var{entity} is in after @var{insn}\n\ - has been executed. @var{mode} is the mode that @var{entity} was in\n\ - before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}.\n\ -+@var{regs_live} is the set of hard registers that are live after @var{insn}\n\ -+has been executed.\n\ - \n\ - @var{mode} is equal to the number of modes defined for @var{entity}\n\ - if the mode before @var{insn} is unknown. The hook should likewise return\n\ -@@ -7021,7 +7023,7 @@ the number of modes if it does not know what mode @var{entity} has after\n\ - @var{insn}.\n\ - \n\ - Not defining the hook is equivalent to returning @var{mode}.", -- int, (int entity, int mode, rtx_insn *insn), NULL) -+ int, (int entity, int mode, rtx_insn *insn, HARD_REG_SET regs_live), NULL) - - DEFHOOK - (entry, --- -2.33.0 - diff --git a/0141-Backport-SME-mode-switching-Use-1-based-edge-aux-fie.patch b/0141-Backport-SME-mode-switching-Use-1-based-edge-aux-fie.patch deleted file mode 100644 index 0cb257e..0000000 --- a/0141-Backport-SME-mode-switching-Use-1-based-edge-aux-fie.patch +++ /dev/null @@ -1,56 +0,0 @@ -From b0d3536b2a28d3a7084e3bbb9532e719aaf2016b Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Sat, 11 Nov 2023 17:28:59 +0000 -Subject: [PATCH 042/157] [Backport][SME] mode-switching: Use 1-based edge aux - fields - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=718228a6f479fe252e0e6f71933c2465b7b480a9 - -The pass used the edge aux field to record which mode change -should happen on the edge, with -1 meaning "none". It's more -convenient for later patches to leave aux zero for "none", -and use numbers based at 1 to record a change. - -gcc/ - * mode-switching.cc (commit_mode_sets): Use 1-based edge aux values. ---- - gcc/mode-switching.cc | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc -index 4f0445894..89a8494c6 100644 ---- a/gcc/mode-switching.cc -+++ b/gcc/mode-switching.cc -@@ -106,10 +106,10 @@ commit_mode_sets (struct edge_list *edge_list, int e, struct bb_info *info) - for (int ed = NUM_EDGES (edge_list) - 1; ed >= 0; ed--) - { - edge eg = INDEX_EDGE (edge_list, ed); -- int mode; - -- if ((mode = (int)(intptr_t)(eg->aux)) != -1) -+ if (eg->aux) - { -+ int mode = (int) (intptr_t) eg->aux - 1; - HARD_REG_SET live_at_edge; - basic_block src_bb = eg->src; - int cur_mode = info[src_bb->index].mode_out; -@@ -727,14 +727,14 @@ optimize_mode_switching (void) - { - edge eg = INDEX_EDGE (edge_list, ed); - -- eg->aux = (void *)(intptr_t)-1; -+ eg->aux = (void *) (intptr_t) 0; - - for (i = 0; i < no_mode; i++) - { - int m = targetm.mode_switching.priority (entity_map[j], i); - if (mode_bit_p (insert[ed], j, m)) - { -- eg->aux = (void *)(intptr_t)m; -+ eg->aux = (void *) (intptr_t) (m + 1); - break; - } - } --- -2.33.0 - diff --git a/0142-Backport-SME-mode-switching-Add-a-target-configurabl.patch b/0142-Backport-SME-mode-switching-Add-a-target-configurabl.patch deleted file mode 100644 index 9123e40..0000000 --- a/0142-Backport-SME-mode-switching-Add-a-target-configurabl.patch +++ /dev/null @@ -1,337 +0,0 @@ -From 88d76baa38bb29d5cc732b3c0188b74ef9783713 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Sat, 11 Nov 2023 17:28:59 +0000 -Subject: [PATCH 043/157] [Backport][SME] mode-switching: Add a - target-configurable confluence operator - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=493b0038d7d04986c7de977074d095e4eb7d9a27 - -The mode-switching pass assumed that all of an entity's modes -were mutually exclusive. However, the upcoming SME changes -have an entity with some overlapping modes, so that there is -sometimes a "superunion" mode that contains two given modes. -We can use this relationship to pass something more helpful than -"don't know" to the emit hook. - -This patch adds a new hook that targets can use to specify -a mode confluence operator. - -With mutually exclusive modes, it's possible to compute a block's -incoming and outgoing modes by looking at its availability sets. -With the confluence operator, we instead need to solve a full -dataflow problem. - -However, when emitting a mode transition, the upcoming SME use of -mode-switching benefits from having as much information as possible -about the starting mode. Calculating this information is definitely -worth the compile time. - -The dataflow problem is written to work before and after the LCM -problem has been solved. A later patch makes use of this. - -While there (since git blame would ping me for the reindented code), -I used a lambda to avoid the cut-&-pasted loops. - -gcc/ - * target.def (mode_switching.confluence): New hook. - * doc/tm.texi (TARGET_MODE_CONFLUENCE): New @hook. - * doc/tm.texi.in: Regenerate. - * mode-switching.cc (confluence_info): New variable. - (mode_confluence, forward_confluence_n, forward_transfer): New - functions. - (optimize_mode_switching): Use them to calculate mode_in when - TARGET_MODE_CONFLUENCE is defined. ---- - gcc/doc/tm.texi | 16 ++++ - gcc/doc/tm.texi.in | 2 + - gcc/mode-switching.cc | 179 +++++++++++++++++++++++++++++++++++------- - gcc/target.def | 17 ++++ - 4 files changed, 186 insertions(+), 28 deletions(-) - -diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi -index 7fce485b2..d7053ec9e 100644 ---- a/gcc/doc/tm.texi -+++ b/gcc/doc/tm.texi -@@ -10306,6 +10306,22 @@ the number of modes if it does not know what mode @var{entity} has after - Not defining the hook is equivalent to returning @var{mode}. - @end deftypefn - -+@deftypefn {Target Hook} int TARGET_MODE_CONFLUENCE (int @var{entity}, int @var{mode1}, int @var{mode2}) -+By default, the mode-switching pass assumes that a given entity's modes -+are mutually exclusive. This means that the pass can only tell -+@code{TARGET_MODE_EMIT} about an entity's previous mode if all -+incoming paths of execution leave the entity in the same state. -+ -+However, some entities might have overlapping, non-exclusive modes, -+so that it is sometimes possible to represent ``mode @var{mode1} or mode -+@var{mode2}'' with something more specific than ``mode not known''. -+If this is true for at least one entity, you should define this hook -+and make it return a mode that includes @var{mode1} and @var{mode2} -+as possibilities. (The mode can include other possibilities too.) -+The hook should return the number of modes if no suitable mode exists -+for the given arguments. -+@end deftypefn -+ - @deftypefn {Target Hook} int TARGET_MODE_ENTRY (int @var{entity}) - If this hook is defined, it is evaluated for every @var{entity} that - needs mode switching. It should return the mode that @var{entity} is -diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in -index ad343504f..d420e62fd 100644 ---- a/gcc/doc/tm.texi.in -+++ b/gcc/doc/tm.texi.in -@@ -6922,6 +6922,8 @@ mode or ``no mode'', depending on context. - - @hook TARGET_MODE_AFTER - -+@hook TARGET_MODE_CONFLUENCE -+ - @hook TARGET_MODE_ENTRY - - @hook TARGET_MODE_EXIT -diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc -index 89a8494c6..065767902 100644 ---- a/gcc/mode-switching.cc -+++ b/gcc/mode-switching.cc -@@ -484,6 +484,101 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes) - return pre_exit; - } - -+/* Return the confluence of modes MODE1 and MODE2 for entity ENTITY, -+ using NO_MODE to represent an unknown mode if nothing more precise -+ is available. */ -+ -+int -+mode_confluence (int entity, int mode1, int mode2, int no_mode) -+{ -+ if (mode1 == mode2) -+ return mode1; -+ -+ if (mode1 != no_mode -+ && mode2 != no_mode -+ && targetm.mode_switching.confluence) -+ return targetm.mode_switching.confluence (entity, mode1, mode2); -+ -+ return no_mode; -+} -+ -+/* Information for the dataflow problems below. */ -+struct -+{ -+ /* Information about each basic block, indexed by block id. */ -+ struct bb_info *bb_info; -+ -+ /* The entity that we're processing. */ -+ int entity; -+ -+ /* The number of modes defined for the entity, and thus the identifier -+ of the "don't know" mode. */ -+ int no_mode; -+} confluence_info; -+ -+/* Propagate information about any mode change on edge E to the -+ destination block's mode_in. Return true if something changed. -+ -+ The mode_in and mode_out fields use no_mode + 1 to mean "not yet set". */ -+ -+static bool -+forward_confluence_n (edge e) -+{ -+ /* The entry and exit blocks have no useful mode information. */ -+ if (e->src->index == ENTRY_BLOCK || e->dest->index == EXIT_BLOCK) -+ return false; -+ -+ /* We don't control mode changes across abnormal edges. */ -+ if (e->flags & EDGE_ABNORMAL) -+ return false; -+ -+ /* E->aux is nonzero if we have computed the LCM problem and scheduled -+ E to change the mode to E->aux - 1. Otherwise model the change -+ from the source to the destination. */ -+ struct bb_info *bb_info = confluence_info.bb_info; -+ int no_mode = confluence_info.no_mode; -+ int src_mode = bb_info[e->src->index].mode_out; -+ if (e->aux) -+ src_mode = (int) (intptr_t) e->aux - 1; -+ if (src_mode == no_mode + 1) -+ return false; -+ -+ int dest_mode = bb_info[e->dest->index].mode_in; -+ if (dest_mode == no_mode + 1) -+ { -+ bb_info[e->dest->index].mode_in = src_mode; -+ return true; -+ } -+ -+ int entity = confluence_info.entity; -+ int new_mode = mode_confluence (entity, src_mode, dest_mode, no_mode); -+ if (dest_mode == new_mode) -+ return false; -+ -+ bb_info[e->dest->index].mode_in = new_mode; -+ return true; -+} -+ -+/* Update block BB_INDEX's mode_out based on its mode_in. Return true if -+ something changed. */ -+ -+static bool -+forward_transfer (int bb_index) -+{ -+ /* The entry and exit blocks have no useful mode information. */ -+ if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK) -+ return false; -+ -+ /* Only propagate through a block if the entity is transparent. */ -+ struct bb_info *bb_info = confluence_info.bb_info; -+ if (bb_info[bb_index].computing != confluence_info.no_mode -+ || bb_info[bb_index].mode_out == bb_info[bb_index].mode_in) -+ return false; -+ -+ bb_info[bb_index].mode_out = bb_info[bb_index].mode_in; -+ return true; -+} -+ - /* Find all insns that need a particular mode setting, and insert the - necessary mode switches. Return true if we did work. */ - -@@ -567,6 +662,39 @@ optimize_mode_switching (void) - - auto_sbitmap transp_all (last_basic_block_for_fn (cfun)); - -+ auto_bitmap blocks; -+ -+ /* Forward-propagate mode information through blocks where the entity -+ is transparent, so that mode_in describes the mode on entry to each -+ block and mode_out describes the mode on exit from each block. */ -+ auto forwprop_mode_info = [&](struct bb_info *info, -+ int entity, int no_mode) -+ { -+ /* Use no_mode + 1 to mean "not yet set". */ -+ FOR_EACH_BB_FN (bb, cfun) -+ { -+ if (bb_has_abnormal_pred (bb)) -+ info[bb->index].mode_in = info[bb->index].seginfo->mode; -+ else -+ info[bb->index].mode_in = no_mode + 1; -+ if (info[bb->index].computing != no_mode) -+ info[bb->index].mode_out = info[bb->index].computing; -+ else -+ info[bb->index].mode_out = no_mode + 1; -+ } -+ -+ confluence_info.bb_info = info; -+ confluence_info.entity = entity; -+ confluence_info.no_mode = no_mode; -+ -+ bitmap_set_range (blocks, 0, last_basic_block_for_fn (cfun)); -+ df_simple_dataflow (DF_FORWARD, NULL, NULL, forward_confluence_n, -+ forward_transfer, blocks, -+ df_get_postorder (DF_FORWARD), -+ df_get_n_blocks (DF_FORWARD)); -+ -+ }; -+ - for (j = n_entities - 1; j >= 0; j--) - { - int e = entity_map[j]; -@@ -720,6 +848,7 @@ optimize_mode_switching (void) - for (j = n_entities - 1; j >= 0; j--) - { - int no_mode = num_modes[entity_map[j]]; -+ struct bb_info *info = bb_info[j]; - - /* Insert all mode sets that have been inserted by lcm. */ - -@@ -740,39 +869,33 @@ optimize_mode_switching (void) - } - } - -+ /* mode_in and mode_out can be calculated directly from avin and -+ avout if all the modes are mutually exclusive. Use the target- -+ provided confluence function otherwise. */ -+ if (targetm.mode_switching.confluence) -+ forwprop_mode_info (info, entity_map[j], no_mode); -+ - FOR_EACH_BB_FN (bb, cfun) - { -- struct bb_info *info = bb_info[j]; -- int last_mode = no_mode; -- -- /* intialize mode in availability for bb. */ -- for (i = 0; i < no_mode; i++) -- if (mode_bit_p (avout[bb->index], j, i)) -- { -- if (last_mode == no_mode) -- last_mode = i; -- if (last_mode != i) -+ auto modes_confluence = [&](sbitmap *av) -+ { -+ for (int i = 0; i < no_mode; ++i) -+ if (mode_bit_p (av[bb->index], j, i)) - { -- last_mode = no_mode; -- break; -+ for (int i2 = i + 1; i2 < no_mode; ++i2) -+ if (mode_bit_p (av[bb->index], j, i2)) -+ return no_mode; -+ return i; - } -- } -- info[bb->index].mode_out = last_mode; -+ return no_mode; -+ }; - -- /* intialize mode out availability for bb. */ -- last_mode = no_mode; -- for (i = 0; i < no_mode; i++) -- if (mode_bit_p (avin[bb->index], j, i)) -- { -- if (last_mode == no_mode) -- last_mode = i; -- if (last_mode != i) -- { -- last_mode = no_mode; -- break; -- } -- } -- info[bb->index].mode_in = last_mode; -+ /* intialize mode in/out availability for bb. */ -+ if (!targetm.mode_switching.confluence) -+ { -+ info[bb->index].mode_out = modes_confluence (avout); -+ info[bb->index].mode_in = modes_confluence (avin); -+ } - - for (i = 0; i < no_mode; i++) - if (mode_bit_p (del[bb->index], j, i)) -diff --git a/gcc/target.def b/gcc/target.def -index 67c20bbb0..1e2091ed3 100644 ---- a/gcc/target.def -+++ b/gcc/target.def -@@ -7025,6 +7025,23 @@ the number of modes if it does not know what mode @var{entity} has after\n\ - Not defining the hook is equivalent to returning @var{mode}.", - int, (int entity, int mode, rtx_insn *insn, HARD_REG_SET regs_live), NULL) - -+DEFHOOK -+(confluence, -+ "By default, the mode-switching pass assumes that a given entity's modes\n\ -+are mutually exclusive. This means that the pass can only tell\n\ -+@code{TARGET_MODE_EMIT} about an entity's previous mode if all\n\ -+incoming paths of execution leave the entity in the same state.\n\ -+\n\ -+However, some entities might have overlapping, non-exclusive modes,\n\ -+so that it is sometimes possible to represent ``mode @var{mode1} or mode\n\ -+@var{mode2}'' with something more specific than ``mode not known''.\n\ -+If this is true for at least one entity, you should define this hook\n\ -+and make it return a mode that includes @var{mode1} and @var{mode2}\n\ -+as possibilities. (The mode can include other possibilities too.)\n\ -+The hook should return the number of modes if no suitable mode exists\n\ -+for the given arguments.", -+ int, (int entity, int mode1, int mode2), NULL) -+ - DEFHOOK - (entry, - "If this hook is defined, it is evaluated for every @var{entity} that\n\ --- -2.33.0 - diff --git a/0143-Backport-SME-mode-switching-Add-a-backprop-hook.patch b/0143-Backport-SME-mode-switching-Add-a-backprop-hook.patch deleted file mode 100644 index 8fb51ae..0000000 --- a/0143-Backport-SME-mode-switching-Add-a-backprop-hook.patch +++ /dev/null @@ -1,483 +0,0 @@ -From cb4189b45a3a411958ab6aa85108f6dc7516acf5 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Sat, 11 Nov 2023 17:29:00 +0000 -Subject: [PATCH 044/157] [Backport][SME] mode-switching: Add a backprop hook - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fc8458e20a524d053f576d64a606e21f8bd03b84 - -This patch adds a way for targets to ask that selected mode changes -be brought forward, through a combination of: - -(1) requiring a mode in blocks where the entity was previously - transparent - -(2) pushing the transition at the head of a block onto incomging edges - -SME has two uses for this: - -- A "one-shot" entity that, for any given path of execution, - either stays off or makes exactly one transition from off to on. - This relies only on (1) above; see the hook description for more info. - - The main purpose of using mode-switching for this entity is to - shrink-wrap the code that requires it. - -- A second entity for which all transitions must be from known - modes, which is enforced using a combination of (1) and (2). - More specifically, (1) looks for edges B1->B2 for which: - - - B2 requires a specific mode and - - B1 does not guarantee a specific starting mode - - In this system, such an edge is only possible if the entity is - transparent in B1. (1) then forces B1 to require some safe common - mode. Applying this inductively means that all incoming edges are - from known modes. If different edges give different starting modes, - (2) pushes the transitions onto the edges themselves; this only - happens if the entity is not transparent in some predecessor block. - -The patch also uses the back-propagation as an excuse to do a simple -on-the-fly optimisation. - -Hopefully the comments in the patch explain things a bit better. - -gcc/ - * target.def (mode_switching.backprop): New hook. - * doc/tm.texi.in (TARGET_MODE_BACKPROP): New @hook. - * doc/tm.texi: Regenerate. - * mode-switching.cc (struct bb_info): Add single_succ. - (confluence_info): Add transp field. - (single_succ_confluence_n, single_succ_transfer): New functions. - (backprop_confluence_n, backprop_transfer): Likewise. - (optimize_mode_switching): Use them. Push mode transitions onto - a block's incoming edges, if the backprop hook requires it. ---- - gcc/doc/tm.texi | 28 +++++ - gcc/doc/tm.texi.in | 2 + - gcc/mode-switching.cc | 275 ++++++++++++++++++++++++++++++++++++++++++ - gcc/target.def | 29 +++++ - 4 files changed, 334 insertions(+) - -diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi -index d7053ec9e..5f0972356 100644 ---- a/gcc/doc/tm.texi -+++ b/gcc/doc/tm.texi -@@ -10322,6 +10322,34 @@ The hook should return the number of modes if no suitable mode exists - for the given arguments. - @end deftypefn - -+@deftypefn {Target Hook} int TARGET_MODE_BACKPROP (int @var{entity}, int @var{mode1}, int @var{mode2}) -+If defined, the mode-switching pass uses this hook to back-propagate mode -+requirements through blocks that have no mode requirements of their own. -+Specifically, @var{mode1} is the mode that @var{entity} has on exit -+from a block B1 (say) and @var{mode2} is the mode that the next block -+requires @var{entity} to have. B1 does not have any mode requirements -+of its own. -+ -+The hook should return the mode that it prefers or requires @var{entity} -+to have in B1, or the number of modes if there is no such requirement. -+If the hook returns a required mode for more than one of B1's outgoing -+edges, those modes are combined as for @code{TARGET_MODE_CONFLUENCE}. -+ -+For example, suppose there is a ``one-shot'' entity that, -+for a given execution of a function, either stays off or makes exactly -+one transition from off to on. It is safe to make the transition at any -+time, but it is better not to do so unnecessarily. This hook allows the -+function to manage such an entity without having to track its state at -+runtime. Specifically. the entity would have two modes, 0 for off and -+1 for on, with 2 representing ``don't know''. The system is forbidden from -+transitioning from 2 to 1, since 2 represents the possibility that the -+entity is already on (and the aim is to avoid having to emit code to -+check for that case). This hook would therefore return 1 when @var{mode1} -+is 2 and @var{mode2} is 1, which would force the entity to be on in the -+source block. Applying this inductively would remove all transitions -+in which the previous state is unknown. -+@end deftypefn -+ - @deftypefn {Target Hook} int TARGET_MODE_ENTRY (int @var{entity}) - If this hook is defined, it is evaluated for every @var{entity} that - needs mode switching. It should return the mode that @var{entity} is -diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in -index d420e62fd..fcab21744 100644 ---- a/gcc/doc/tm.texi.in -+++ b/gcc/doc/tm.texi.in -@@ -6924,6 +6924,8 @@ mode or ``no mode'', depending on context. - - @hook TARGET_MODE_CONFLUENCE - -+@hook TARGET_MODE_BACKPROP -+ - @hook TARGET_MODE_ENTRY - - @hook TARGET_MODE_EXIT -diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc -index 065767902..c2a0f0294 100644 ---- a/gcc/mode-switching.cc -+++ b/gcc/mode-switching.cc -@@ -81,6 +81,7 @@ struct bb_info - int computing; - int mode_out; - int mode_in; -+ int single_succ; - }; - - /* Clear ode I from entity J in bitmap B. */ -@@ -508,6 +509,9 @@ struct - /* Information about each basic block, indexed by block id. */ - struct bb_info *bb_info; - -+ /* A bitmap of blocks for which the current entity is transparent. */ -+ sbitmap transp; -+ - /* The entity that we're processing. */ - int entity; - -@@ -579,6 +583,210 @@ forward_transfer (int bb_index) - return true; - } - -+/* A backwards confluence function. Update the the bb_info single_succ -+ field for E's source block, based on changes to E's destination block. -+ At the end of the dataflow problem, single_succ is the single mode -+ that all successors require (directly or indirectly), or no_mode -+ if there are conflicting requirements. -+ -+ Initially, a value of no_mode + 1 means "don't know". */ -+ -+static bool -+single_succ_confluence_n (edge e) -+{ -+ /* The entry block has no associated mode information. */ -+ if (e->src->index == ENTRY_BLOCK) -+ return false; -+ -+ /* We don't control mode changes across abnormal edges. */ -+ if (e->flags & EDGE_ABNORMAL) -+ return false; -+ -+ /* Do nothing if we've already found a conflict. */ -+ struct bb_info *bb_info = confluence_info.bb_info; -+ int no_mode = confluence_info.no_mode; -+ int src_mode = bb_info[e->src->index].single_succ; -+ if (src_mode == no_mode) -+ return false; -+ -+ /* Work out what mode the destination block (or its successors) require. */ -+ int dest_mode; -+ if (e->dest->index == EXIT_BLOCK) -+ dest_mode = no_mode; -+ else if (bitmap_bit_p (confluence_info.transp, e->dest->index)) -+ dest_mode = bb_info[e->dest->index].single_succ; -+ else -+ dest_mode = bb_info[e->dest->index].seginfo->mode; -+ -+ /* Do nothing if the destination block has no new information. */ -+ if (dest_mode == no_mode + 1 || dest_mode == src_mode) -+ return false; -+ -+ /* Detect conflicting modes. */ -+ if (src_mode != no_mode + 1) -+ dest_mode = no_mode; -+ -+ bb_info[e->src->index].single_succ = dest_mode; -+ return true; -+} -+ -+/* A backward transfer function for computing the bb_info single_succ -+ fields, as described above single_succ_confluence. */ -+ -+static bool -+single_succ_transfer (int bb_index) -+{ -+ /* We don't have any field to transfer to. Assume that, after the -+ first iteration, we are only called if single_succ has changed. -+ We should then process incoming edges if the entity is transparent. */ -+ return bitmap_bit_p (confluence_info.transp, bb_index); -+} -+ -+/* Check whether the target wants to back-propagate a mode change across -+ edge E, and update the source block's computed mode if so. Return true -+ if something changed. */ -+ -+static bool -+backprop_confluence_n (edge e) -+{ -+ /* The entry and exit blocks have no useful mode information. */ -+ if (e->src->index == ENTRY_BLOCK || e->dest->index == EXIT_BLOCK) -+ return false; -+ -+ /* We don't control mode changes across abnormal edges. */ -+ if (e->flags & EDGE_ABNORMAL) -+ return false; -+ -+ /* We can only require a new mode in the source block if the entity -+ was originally transparent there. */ -+ if (!bitmap_bit_p (confluence_info.transp, e->src->index)) -+ return false; -+ -+ /* Exit now if there is no required mode, or if all paths into the -+ source block leave the entity in the required mode. */ -+ struct bb_info *bb_info = confluence_info.bb_info; -+ int no_mode = confluence_info.no_mode; -+ int src_mode = bb_info[e->src->index].mode_out; -+ int dest_mode = bb_info[e->dest->index].mode_in; -+ if (dest_mode == no_mode || src_mode == dest_mode) -+ return false; -+ -+ /* See what the target thinks about this transition. */ -+ int entity = confluence_info.entity; -+ int new_mode = targetm.mode_switching.backprop (entity, src_mode, -+ dest_mode); -+ if (new_mode == no_mode) -+ return false; -+ -+ /* The target doesn't like the current transition, but would be happy -+ with a transition from NEW_MODE. -+ -+ If we force the source block to use NEW_MODE, we might introduce a -+ double transition on at least one path through the function (one to -+ NEW_MODE and then one to DEST_MODE). Therefore, if all destination -+ blocks require the same mode, it is usually better to bring that -+ mode requirement forward. -+ -+ If that isn't possible, merge the preference for this edge with -+ the preferences for other edges. no_mode + 1 indicates that there -+ was no previous preference. */ -+ int old_mode = bb_info[e->src->index].computing; -+ if (bb_info[e->src->index].single_succ != no_mode) -+ new_mode = bb_info[e->src->index].single_succ; -+ else if (old_mode != no_mode + 1) -+ new_mode = mode_confluence (entity, old_mode, new_mode, no_mode); -+ -+ if (old_mode == new_mode) -+ return false; -+ -+ bb_info[e->src->index].computing = new_mode; -+ return true; -+} -+ -+/* If the current entity was originally transparent in block BB_INDEX, -+ update the incoming mode to match the outgoing mode. Register a mode -+ change if the entity is no longer transparent. -+ -+ Also, as an on-the-fly optimization, check whether the entity was -+ originally transparent in BB_INDEX and if all successor blocks require -+ the same mode. If so, anticipate the mode change in BB_INDEX if -+ doing it on the incoming edges would require no more mode changes than -+ doing it on the outgoing edges. The aim is to reduce the total number -+ of mode changes emitted for the function (and thus reduce code size and -+ cfg complexity) without increasing the number of mode changes on any -+ given path through the function. A typical case where it helps is: -+ -+ T -+ / \ -+ T M -+ \ / -+ M -+ -+ where the entity is transparent in the T blocks and is required to have -+ mode M in the M blocks. If there are no redundancies leading up to this, -+ there will be two mutually-exclusive changes to mode M, one on each of -+ the T->M edges. The optimization instead converts it to: -+ -+ T T M -+ / \ / \ / \ -+ T M -> M M -> M M -+ \ / \ / \ / -+ M M M -+ -+ which creates a single transition to M for both paths through the diamond. -+ -+ Return true if something changed. */ -+ -+static bool -+backprop_transfer (int bb_index) -+{ -+ /* The entry and exit blocks have no useful mode information. */ -+ if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK) -+ return false; -+ -+ /* We can only require a new mode if the entity was previously -+ transparent. */ -+ if (!bitmap_bit_p (confluence_info.transp, bb_index)) -+ return false; -+ -+ struct bb_info *bb_info = confluence_info.bb_info; -+ basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bb_index); -+ int no_mode = confluence_info.no_mode; -+ int mode_in = bb_info[bb_index].mode_in; -+ int mode_out = bb_info[bb_index].computing; -+ if (mode_out == no_mode + 1) -+ { -+ /* The entity is still transparent for this block. See whether -+ all successor blocks need the same mode, either directly or -+ indirectly. */ -+ mode_out = bb_info[bb_index].single_succ; -+ if (mode_out == no_mode) -+ return false; -+ -+ /* Get a minimum bound on the number of transitions that would be -+ removed if BB itself required MODE_OUT. */ -+ unsigned int moved = 0; -+ for (edge e : bb->succs) -+ if (e->dest->index != EXIT_BLOCK -+ && mode_out == bb_info[e->dest->index].seginfo->mode) -+ moved += 1; -+ -+ /* See whether making the mode change on all incoming edges would -+ be no worse than making it on MOVED outgoing edges. */ -+ if (moved < EDGE_COUNT (bb->preds)) -+ return false; -+ -+ bb_info[bb_index].mode_out = mode_out; -+ bb_info[bb_index].computing = mode_out; -+ } -+ else if (mode_out == mode_in) -+ return false; -+ -+ bb_info[bb_index].mode_in = mode_out; -+ bb_info[bb_index].seginfo->mode = mode_out; -+ return true; -+} -+ - /* Find all insns that need a particular mode setting, and insert the - necessary mode switches. Return true if we did work. */ - -@@ -684,6 +892,7 @@ optimize_mode_switching (void) - } - - confluence_info.bb_info = info; -+ confluence_info.transp = nullptr; - confluence_info.entity = entity; - confluence_info.no_mode = no_mode; - -@@ -695,6 +904,9 @@ optimize_mode_switching (void) - - }; - -+ if (targetm.mode_switching.backprop) -+ clear_aux_for_edges (); -+ - for (j = n_entities - 1; j >= 0; j--) - { - int e = entity_map[j]; -@@ -817,6 +1029,53 @@ optimize_mode_switching (void) - } - } - -+ /* If the target requests it, back-propagate selected mode requirements -+ through transparent blocks. */ -+ if (targetm.mode_switching.backprop) -+ { -+ /* First work out the mode on entry to and exit from each block. */ -+ forwprop_mode_info (info, e, no_mode); -+ -+ /* Compute the single_succ fields, as described above -+ single_succ_confluence. */ -+ FOR_EACH_BB_FN (bb, cfun) -+ info[bb->index].single_succ = no_mode + 1; -+ -+ confluence_info.transp = transp_all; -+ bitmap_set_range (blocks, 0, last_basic_block_for_fn (cfun)); -+ df_simple_dataflow (DF_BACKWARD, NULL, NULL, -+ single_succ_confluence_n, -+ single_succ_transfer, blocks, -+ df_get_postorder (DF_BACKWARD), -+ df_get_n_blocks (DF_BACKWARD)); -+ -+ FOR_EACH_BB_FN (bb, cfun) -+ { -+ /* Repurpose mode_in as the first mode required by the block, -+ or the output mode if none. */ -+ if (info[bb->index].seginfo->mode != no_mode) -+ info[bb->index].mode_in = info[bb->index].seginfo->mode; -+ -+ /* In transparent blocks, use computing == no_mode + 1 -+ to indicate that no propagation has taken place. */ -+ if (info[bb->index].computing == no_mode) -+ info[bb->index].computing = no_mode + 1; -+ } -+ -+ bitmap_set_range (blocks, 0, last_basic_block_for_fn (cfun)); -+ df_simple_dataflow (DF_BACKWARD, NULL, NULL, backprop_confluence_n, -+ backprop_transfer, blocks, -+ df_get_postorder (DF_BACKWARD), -+ df_get_n_blocks (DF_BACKWARD)); -+ -+ /* Any block that now computes a mode is no longer transparent. */ -+ FOR_EACH_BB_FN (bb, cfun) -+ if (info[bb->index].computing == no_mode + 1) -+ info[bb->index].computing = no_mode; -+ else if (info[bb->index].computing != no_mode) -+ bitmap_clear_bit (transp_all, bb->index); -+ } -+ - /* Set the anticipatable and computing arrays. */ - for (i = 0; i < no_mode; i++) - { -@@ -900,6 +1159,22 @@ optimize_mode_switching (void) - for (i = 0; i < no_mode; i++) - if (mode_bit_p (del[bb->index], j, i)) - info[bb->index].seginfo->mode = no_mode; -+ -+ /* See whether the target can perform the first transition. -+ If not, push it onto the incoming edges. The earlier backprop -+ pass should ensure that the resulting transitions are valid. */ -+ if (targetm.mode_switching.backprop) -+ { -+ int from_mode = info[bb->index].mode_in; -+ int to_mode = info[bb->index].seginfo->mode; -+ if (targetm.mode_switching.backprop (entity_map[j], from_mode, -+ to_mode) != no_mode) -+ { -+ for (edge e : bb->preds) -+ e->aux = (void *) (intptr_t) (to_mode + 1); -+ info[bb->index].mode_in = to_mode; -+ } -+ } - } - - /* Now output the remaining mode sets in all the segments. */ -diff --git a/gcc/target.def b/gcc/target.def -index 1e2091ed3..4d77c1523 100644 ---- a/gcc/target.def -+++ b/gcc/target.def -@@ -7042,6 +7042,35 @@ The hook should return the number of modes if no suitable mode exists\n\ - for the given arguments.", - int, (int entity, int mode1, int mode2), NULL) - -+DEFHOOK -+(backprop, -+ "If defined, the mode-switching pass uses this hook to back-propagate mode\n\ -+requirements through blocks that have no mode requirements of their own.\n\ -+Specifically, @var{mode1} is the mode that @var{entity} has on exit\n\ -+from a block B1 (say) and @var{mode2} is the mode that the next block\n\ -+requires @var{entity} to have. B1 does not have any mode requirements\n\ -+of its own.\n\ -+\n\ -+The hook should return the mode that it prefers or requires @var{entity}\n\ -+to have in B1, or the number of modes if there is no such requirement.\n\ -+If the hook returns a required mode for more than one of B1's outgoing\n\ -+edges, those modes are combined as for @code{TARGET_MODE_CONFLUENCE}.\n\ -+\n\ -+For example, suppose there is a ``one-shot'' entity that,\n\ -+for a given execution of a function, either stays off or makes exactly\n\ -+one transition from off to on. It is safe to make the transition at any\n\ -+time, but it is better not to do so unnecessarily. This hook allows the\n\ -+function to manage such an entity without having to track its state at\n\ -+runtime. Specifically. the entity would have two modes, 0 for off and\n\ -+1 for on, with 2 representing ``don't know''. The system is forbidden from\n\ -+transitioning from 2 to 1, since 2 represents the possibility that the\n\ -+entity is already on (and the aim is to avoid having to emit code to\n\ -+check for that case). This hook would therefore return 1 when @var{mode1}\n\ -+is 2 and @var{mode2} is 1, which would force the entity to be on in the\n\ -+source block. Applying this inductively would remove all transitions\n\ -+in which the previous state is unknown.", -+ int, (int entity, int mode1, int mode2), NULL) -+ - DEFHOOK - (entry, - "If this hook is defined, it is evaluated for every @var{entity} that\n\ --- -2.33.0 - diff --git a/0144-Backport-SME-aarch64-Add-a-result_mode-helper-functi.patch b/0144-Backport-SME-aarch64-Add-a-result_mode-helper-functi.patch deleted file mode 100644 index ad6da47..0000000 --- a/0144-Backport-SME-aarch64-Add-a-result_mode-helper-functi.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 4553f252c10968037edceba4abe3984dc9bbad2a Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Fri, 1 Dec 2023 08:36:15 +0000 -Subject: [PATCH 045/157] [Backport][SME] aarch64: Add a result_mode helper - function - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a1bc121c00e30bd1bdaa62d87cbe64eb88e74f45 - -SME will add more intrinsics whose expansion code requires -the mode of the function return value. This patch adds an -associated helper routine. - -gcc/ - * config/aarch64/aarch64-sve-builtins.h - (function_expander::result_mode): New member function. - * config/aarch64/aarch64-sve-builtins-base.cc - (svld234_impl::expand): Use it. - * config/aarch64/aarch64-sve-builtins.cc - (function_expander::get_reg_target): Likewise. ---- - gcc/config/aarch64/aarch64-sve-builtins-base.cc | 2 +- - gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +- - gcc/config/aarch64/aarch64-sve-builtins.h | 9 +++++++++ - 3 files changed, 11 insertions(+), 2 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc -index 56c9d75e7..c9bf13792 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc -@@ -1272,7 +1272,7 @@ public: - rtx - expand (function_expander &e) const OVERRIDE - { -- machine_mode tuple_mode = TYPE_MODE (TREE_TYPE (e.call_expr)); -+ machine_mode tuple_mode = e.result_mode (); - insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab, - tuple_mode, e.vector_mode (0)); - return e.use_contiguous_load_insn (icode); -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc -index e168c8334..91af96687 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc -@@ -2796,7 +2796,7 @@ function_expander::get_fallback_value (machine_mode mode, unsigned int nops, - rtx - function_expander::get_reg_target () - { -- machine_mode target_mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl))); -+ machine_mode target_mode = result_mode (); - if (!possible_target || GET_MODE (possible_target) != target_mode) - possible_target = gen_reg_rtx (target_mode); - return possible_target; -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h -index 0d130b871..52994cde0 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.h -+++ b/gcc/config/aarch64/aarch64-sve-builtins.h -@@ -528,6 +528,8 @@ public: - insn_code direct_optab_handler_for_sign (optab, optab, unsigned int = 0, - machine_mode = E_VOIDmode); - -+ machine_mode result_mode () const; -+ - bool overlaps_input_p (rtx); - - rtx convert_to_pmode (rtx); -@@ -877,6 +879,13 @@ function_base::call_properties (const function_instance &instance) const - return flags; - } - -+/* Return the mode of the result of a call. */ -+inline machine_mode -+function_expander::result_mode () const -+{ -+ return TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl))); -+} -+ - } - - #endif --- -2.33.0 - diff --git a/0145-Backport-SME-rtl-Try-to-remove-EH-edges-after-pro-ep.patch b/0145-Backport-SME-rtl-Try-to-remove-EH-edges-after-pro-ep.patch deleted file mode 100644 index cdaf6ee..0000000 --- a/0145-Backport-SME-rtl-Try-to-remove-EH-edges-after-pro-ep.patch +++ /dev/null @@ -1,232 +0,0 @@ -From 60612cbd9cdd9b5079c0505b9d53c9cd98fba4b1 Mon Sep 17 00:00:00 2001 -From: Kewen Lin -Date: Tue, 15 Nov 2022 20:26:07 -0600 -Subject: [PATCH 046/157] [Backport][SME] rtl: Try to remove EH edges after - {pro,epi}logue generation [PR90259] - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=63e1b2e767a3f4695373c2406ff719c0a60c1858 - -After prologue and epilogue generation, the judgement on whether -one memory access onto stack frame may trap or not could change, -since we get more exact stack information by now. - -As PR90259 shows, some memory access becomes impossible to trap -any more after prologue and epilogue generation, it can make -subsequent optimization be able to remove it if safe, but it -results in unexpected control flow status due to REG_EH_REGION -note missing. - -This patch proposes to try to remove EH edges with function -purge_all_dead_edges after prologue and epilogue generation, -it simplifies CFG as early as we can and don't need any fixup -in downstream passes. - -CFG simplification result with PR90259's case as example: - -*before* - - 18: %1:TF=call [`__gcc_qdiv'] argc:0 - REG_EH_REGION 0x2 - 77: NOTE_INSN_BASIC_BLOCK 3 - 19: NOTE_INSN_DELETED - 20: NOTE_INSN_DELETED - 110: [%31:SI+0x20]=%1:DF - REG_EH_REGION 0x2 - 116: NOTE_INSN_BASIC_BLOCK 4 - 111: [%31:SI+0x28]=%2:DF - REG_EH_REGION 0x2 - 22: NOTE_INSN_BASIC_BLOCK 5 - 108: %0:DF=[%31:SI+0x20] - REG_EH_REGION 0x2 - 117: NOTE_INSN_BASIC_BLOCK 6 - 109: %1:DF=[%31:SI+0x28] - REG_EH_REGION 0x2 - 79: NOTE_INSN_BASIC_BLOCK 7 - 26: [%31:SI+0x18]=%0:DF - 104: pc=L69 - 105: barrier - -*after* - - 18: %1:TF=call [`__gcc_qdiv'] argc:0 - REG_EH_REGION 0x2 - 77: NOTE_INSN_BASIC_BLOCK 3 - 19: NOTE_INSN_DELETED - 20: NOTE_INSN_DELETED - 110: [%31:SI+0x20]=%1:DF - 111: [%31:SI+0x28]=%2:DF - 108: %0:DF=[%31:SI+0x20] - 109: %1:DF=[%31:SI+0x28] - 26: [%31:SI+0x18]=%0:DF - 104: pc=L69 - 105: barrier - - PR rtl-optimization/90259 - -gcc/ChangeLog: - - * function.cc (rest_of_handle_thread_prologue_and_epilogue): Add - parameter fun, and call function purge_all_dead_edges. - (pass_thread_prologue_and_epilogue::execute): Name unamed parameter - as fun, and use it for rest_of_handle_thread_prologue_and_epilogue. - -gcc/testsuite/ChangeLog: - - * g++.target/powerpc/pr90259.C: New. ---- - gcc/function.cc | 13 ++- - gcc/testsuite/g++.target/powerpc/pr90259.C | 103 +++++++++++++++++++++ - 2 files changed, 113 insertions(+), 3 deletions(-) - create mode 100644 gcc/testsuite/g++.target/powerpc/pr90259.C - -diff --git a/gcc/function.cc b/gcc/function.cc -index 49c7ccf4b..28de39dd6 100644 ---- a/gcc/function.cc -+++ b/gcc/function.cc -@@ -6529,7 +6529,7 @@ make_pass_leaf_regs (gcc::context *ctxt) - } - - static unsigned int --rest_of_handle_thread_prologue_and_epilogue (void) -+rest_of_handle_thread_prologue_and_epilogue (function *fun) - { - /* prepare_shrink_wrap is sensitive to the block structure of the control - flow graph, so clean it up first. */ -@@ -6546,6 +6546,13 @@ rest_of_handle_thread_prologue_and_epilogue (void) - Fix that up. */ - fixup_partitions (); - -+ /* After prologue and epilogue generation, the judgement on whether -+ one memory access onto stack frame may trap or not could change, -+ since we get more exact stack information by now. So try to -+ remove any EH edges here, see PR90259. */ -+ if (fun->can_throw_non_call_exceptions) -+ purge_all_dead_edges (); -+ - /* Shrink-wrapping can result in unreachable edges in the epilogue, - see PR57320. */ - cleanup_cfg (optimize ? CLEANUP_EXPENSIVE : 0); -@@ -6614,9 +6621,9 @@ public: - {} - - /* opt_pass methods: */ -- virtual unsigned int execute (function *) -+ unsigned int execute (function * fun) final override - { -- return rest_of_handle_thread_prologue_and_epilogue (); -+ return rest_of_handle_thread_prologue_and_epilogue (fun); - } - - }; // class pass_thread_prologue_and_epilogue -diff --git a/gcc/testsuite/g++.target/powerpc/pr90259.C b/gcc/testsuite/g++.target/powerpc/pr90259.C -new file mode 100644 -index 000000000..db75ac7fe ---- /dev/null -+++ b/gcc/testsuite/g++.target/powerpc/pr90259.C -@@ -0,0 +1,103 @@ -+/* { dg-require-effective-target long_double_ibm128 } */ -+/* { dg-options "-O2 -ffloat-store -fgcse -fnon-call-exceptions -fno-forward-propagate -fno-omit-frame-pointer -fstack-protector-all" } */ -+/* { dg-add-options long_double_ibm128 } */ -+ -+/* Verify there is no ICE. */ -+ -+template struct b -+{ -+ static constexpr int c = a; -+}; -+template using d = b; -+struct e -+{ -+ int f; -+ int -+ g () -+ { -+ return __builtin_ceil (f / (long double) h); -+ } -+ float h; -+}; -+template using k = d; -+template class n -+{ -+public: -+ e ae; -+ void af (); -+}; -+template -+void -+n::af () -+{ -+ ae.g (); -+} -+template using m = int; -+template ::c>> -+using aj = n; -+struct o -+{ -+ void -+ af () -+ { -+ al.af (); -+ } -+ aj al; -+}; -+template class am; -+template class ao -+{ -+protected: -+ static i *ap (int); -+}; -+template class p; -+template class p : ao -+{ -+public: -+ static ar -+ as (const int &p1, j...) -+ { -+ (*ao::ap (p1)) (j ()...); -+ } -+}; -+template class am -+{ -+ template using av = int; -+ -+public: -+ template , void>, -+ typename = av> -+ am (i); -+ using aw = ar (*) (const int &, j...); -+ aw ax; -+}; -+template -+template -+am::am (i) -+{ -+ ax = p::as; -+} -+struct G -+{ -+ void ba (am); -+}; -+struct q -+{ -+ q () -+ { -+ G a; -+ a.ba (r ()); -+ } -+ struct r -+ { -+ void -+ operator() (o p1) -+ try -+ { -+ p1.af (); -+ } -+ catch (int) -+ { -+ } -+ }; -+} s; --- -2.33.0 - diff --git a/0146-Backport-SME-Fix-PR-middle-end-107705-ICE-after-recl.patch b/0146-Backport-SME-Fix-PR-middle-end-107705-ICE-after-recl.patch deleted file mode 100644 index 4d3168b..0000000 --- a/0146-Backport-SME-Fix-PR-middle-end-107705-ICE-after-recl.patch +++ /dev/null @@ -1,71 +0,0 @@ -From beb962ec516f152cef482b229c9adf0390dc3b2c Mon Sep 17 00:00:00 2001 -From: Andrew Pinski -Date: Thu, 17 Nov 2022 22:03:08 +0000 -Subject: [PATCH 047/157] [Backport][SME] Fix PR middle-end/107705: ICE after - reclaration error - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ceba66ee230bb96b0889fc8ec7333c7ffae96d6e - -The problem here is after we created a call expression -in the C front-end, we replace the decl type with -an error mark node. We then end up calling -aggregate_value_p with the call expression -with the decl with the error mark as the type -and we ICE. - -The fix is to check the function type -after we process the call expression inside -aggregate_value_p to get it. - -OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. - -Thanks, -Andrew Pinski - -gcc/ChangeLog: - - PR middle-end/107705 - * function.cc (aggregate_value_p): Return 0 if - the function type was an error operand. - -gcc/testsuite/ChangeLog: - - * gcc.dg/redecl-22.c: New test. ---- - gcc/function.cc | 3 +++ - gcc/testsuite/gcc.dg/redecl-22.c | 9 +++++++++ - 2 files changed, 12 insertions(+) - create mode 100644 gcc/testsuite/gcc.dg/redecl-22.c - -diff --git a/gcc/function.cc b/gcc/function.cc -index 28de39dd6..99aa738eb 100644 ---- a/gcc/function.cc -+++ b/gcc/function.cc -@@ -2090,6 +2090,9 @@ aggregate_value_p (const_tree exp, const_tree fntype) - if (VOID_TYPE_P (type)) - return 0; - -+ if (error_operand_p (fntype)) -+ return 0; -+ - /* If a record should be passed the same as its first (and only) member - don't pass it as an aggregate. */ - if (TREE_CODE (type) == RECORD_TYPE && TYPE_TRANSPARENT_AGGR (type)) -diff --git a/gcc/testsuite/gcc.dg/redecl-22.c b/gcc/testsuite/gcc.dg/redecl-22.c -new file mode 100644 -index 000000000..7758570fa ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/redecl-22.c -@@ -0,0 +1,9 @@ -+/* We used to ICE in the gimplifier, PR 107705 */ -+/* { dg-do compile } */ -+/* { dg-options "-w" } */ -+int f (void) -+{ -+ int (*p) (void) = 0; // { dg-note "" } -+ return p (); -+ int p = 1; // { dg-error "" } -+} --- -2.33.0 - diff --git a/0147-Backport-SME-function-Change-return-type-of-predicat.patch b/0147-Backport-SME-function-Change-return-type-of-predicat.patch deleted file mode 100644 index 809a73b..0000000 --- a/0147-Backport-SME-function-Change-return-type-of-predicat.patch +++ /dev/null @@ -1,351 +0,0 @@ -From c074871572ef22cbcca8f0f4bc493d60caeddd78 Mon Sep 17 00:00:00 2001 -From: Uros Bizjak -Date: Wed, 21 Jun 2023 21:55:30 +0200 -Subject: [PATCH 048/157] [Backport][SME] function: Change return type of - predicate function from int to bool - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ce47d3c2cf59bb2cc94afc4bbef88b0e4950f086 - -Also change some internal variables to bool and some functions to void. - -gcc/ChangeLog: - - * function.h (emit_initial_value_sets): - Change return type from int to void. - (aggregate_value_p): Change return type from int to bool. - (prologue_contains): Ditto. - (epilogue_contains): Ditto. - (prologue_epilogue_contains): Ditto. - * function.cc (temp_slot): Make "in_use" variable bool. - (make_slot_available): Update for changed "in_use" variable. - (assign_stack_temp_for_type): Ditto. - (emit_initial_value_sets): Change return type from int to void - and update function body accordingly. - (instantiate_virtual_regs): Ditto. - (rest_of_handle_thread_prologue_and_epilogue): Ditto. - (safe_insn_predicate): Change return type from int to bool. - (aggregate_value_p): Change return type from int to bool - and update function body accordingly. - (prologue_contains): Change return type from int to bool. - (prologue_epilogue_contains): Ditto. ---- - gcc/function.cc | 77 ++++++++++++++++++++++++------------------------- - gcc/function.h | 10 +++---- - 2 files changed, 42 insertions(+), 45 deletions(-) - -diff --git a/gcc/function.cc b/gcc/function.cc -index 99aa738eb..fc8eb5812 100644 ---- a/gcc/function.cc -+++ b/gcc/function.cc -@@ -578,8 +578,8 @@ public: - tree type; - /* The alignment (in bits) of the slot. */ - unsigned int align; -- /* Nonzero if this temporary is currently in use. */ -- char in_use; -+ /* True if this temporary is currently in use. */ -+ bool in_use; - /* Nesting level at which this slot is being used. */ - int level; - /* The offset of the slot from the frame_pointer, including extra space -@@ -674,7 +674,7 @@ make_slot_available (class temp_slot *temp) - { - cut_slot_from_list (temp, temp_slots_at_level (temp->level)); - insert_slot_to_list (temp, &avail_temp_slots); -- temp->in_use = 0; -+ temp->in_use = false; - temp->level = -1; - n_temp_slots_in_use--; - } -@@ -848,7 +848,7 @@ assign_stack_temp_for_type (machine_mode mode, poly_int64 size, tree type) - if (known_ge (best_p->size - rounded_size, alignment)) - { - p = ggc_alloc (); -- p->in_use = 0; -+ p->in_use = false; - p->size = best_p->size - rounded_size; - p->base_offset = best_p->base_offset + rounded_size; - p->full_size = best_p->full_size - rounded_size; -@@ -918,7 +918,7 @@ assign_stack_temp_for_type (machine_mode mode, poly_int64 size, tree type) - } - - p = selected; -- p->in_use = 1; -+ p->in_use = true; - p->type = type; - p->level = temp_slot_level; - n_temp_slots_in_use++; -@@ -1340,7 +1340,7 @@ has_hard_reg_initial_val (machine_mode mode, unsigned int regno) - return NULL_RTX; - } - --unsigned int -+void - emit_initial_value_sets (void) - { - struct initial_value_struct *ivs = crtl->hard_reg_initial_vals; -@@ -1348,7 +1348,7 @@ emit_initial_value_sets (void) - rtx_insn *seq; - - if (ivs == 0) -- return 0; -+ return; - - start_sequence (); - for (i = 0; i < ivs->num_entries; i++) -@@ -1357,7 +1357,6 @@ emit_initial_value_sets (void) - end_sequence (); - - emit_insn_at_entry (seq); -- return 0; - } - - /* Return the hardreg-pseudoreg initial values pair entry I and -@@ -1535,7 +1534,7 @@ instantiate_virtual_regs_in_rtx (rtx *loc) - /* A subroutine of instantiate_virtual_regs_in_insn. Return true if X - matches the predicate for insn CODE operand OPERAND. */ - --static int -+static bool - safe_insn_predicate (int code, int operand, rtx x) - { - return code < 0 || insn_operand_matches ((enum insn_code) code, operand, x); -@@ -1948,7 +1947,7 @@ instantiate_decls (tree fndecl) - /* Pass through the INSNS of function FNDECL and convert virtual register - references to hard register references. */ - --static unsigned int -+static void - instantiate_virtual_regs (void) - { - rtx_insn *insn; -@@ -2002,8 +2001,6 @@ instantiate_virtual_regs (void) - /* Indicate that, from now on, assign_stack_local should use - frame_pointer_rtx. */ - virtuals_instantiated = 1; -- -- return 0; - } - - namespace { -@@ -2031,7 +2028,8 @@ public: - /* opt_pass methods: */ - virtual unsigned int execute (function *) - { -- return instantiate_virtual_regs (); -+ instantiate_virtual_regs (); -+ return 0; - } - - }; // class pass_instantiate_virtual_regs -@@ -2045,12 +2043,12 @@ make_pass_instantiate_virtual_regs (gcc::context *ctxt) - } - - --/* Return 1 if EXP is an aggregate type (or a value with aggregate type). -+/* Return true if EXP is an aggregate type (or a value with aggregate type). - This means a type for which function calls must pass an address to the - function or get an address back from the function. - EXP may be a type node or an expression (whose type is tested). */ - --int -+bool - aggregate_value_p (const_tree exp, const_tree fntype) - { - const_tree type = (TYPE_P (exp)) ? exp : TREE_TYPE (exp); -@@ -2070,7 +2068,7 @@ aggregate_value_p (const_tree exp, const_tree fntype) - else - /* For internal functions, assume nothing needs to be - returned in memory. */ -- return 0; -+ return false; - } - break; - case FUNCTION_DECL: -@@ -2088,10 +2086,10 @@ aggregate_value_p (const_tree exp, const_tree fntype) - } - - if (VOID_TYPE_P (type)) -- return 0; -+ return false; - - if (error_operand_p (fntype)) -- return 0; -+ return false; - - /* If a record should be passed the same as its first (and only) member - don't pass it as an aggregate. */ -@@ -2102,25 +2100,25 @@ aggregate_value_p (const_tree exp, const_tree fntype) - reference, do so. */ - if ((TREE_CODE (exp) == PARM_DECL || TREE_CODE (exp) == RESULT_DECL) - && DECL_BY_REFERENCE (exp)) -- return 1; -+ return true; - - /* Function types that are TREE_ADDRESSABLE force return in memory. */ - if (fntype && TREE_ADDRESSABLE (fntype)) -- return 1; -+ return true; - - /* Types that are TREE_ADDRESSABLE must be constructed in memory, - and thus can't be returned in registers. */ - if (TREE_ADDRESSABLE (type)) -- return 1; -+ return true; - - if (TYPE_EMPTY_P (type)) -- return 0; -+ return false; - - if (flag_pcc_struct_return && AGGREGATE_TYPE_P (type)) -- return 1; -+ return true; - - if (targetm.calls.return_in_memory (type, fntype)) -- return 1; -+ return true; - - /* Make sure we have suitable call-clobbered regs to return - the value in; if not, we must return it in memory. */ -@@ -2129,7 +2127,7 @@ aggregate_value_p (const_tree exp, const_tree fntype) - /* If we have something other than a REG (e.g. a PARALLEL), then assume - it is OK. */ - if (!REG_P (reg)) -- return 0; -+ return false; - - /* Use the default ABI if the type of the function isn't known. - The scheme for handling interoperability between different ABIs -@@ -2142,9 +2140,9 @@ aggregate_value_p (const_tree exp, const_tree fntype) - nregs = hard_regno_nregs (regno, TYPE_MODE (type)); - for (i = 0; i < nregs; i++) - if (!fixed_regs[regno + i] && !abi.clobbers_full_reg_p (regno + i)) -- return 1; -+ return true; - -- return 0; -+ return false; - } - - /* Return true if we should assign DECL a pseudo register; false if it -@@ -5741,26 +5739,26 @@ contains (const rtx_insn *insn, hash_table *hash) - return hash->find (const_cast (insn)) != NULL; - } - --int -+bool - prologue_contains (const rtx_insn *insn) - { - return contains (insn, prologue_insn_hash); - } - --int -+bool - epilogue_contains (const rtx_insn *insn) - { - return contains (insn, epilogue_insn_hash); - } - --int -+bool - prologue_epilogue_contains (const rtx_insn *insn) - { - if (contains (insn, prologue_insn_hash)) -- return 1; -+ return true; - if (contains (insn, epilogue_insn_hash)) -- return 1; -- return 0; -+ return true; -+ return false; - } - - void -@@ -6386,14 +6384,13 @@ current_function_name (void) - } - - --static unsigned int -+static void - rest_of_handle_check_leaf_regs (void) - { - #ifdef LEAF_REGISTERS - crtl->uses_only_leaf_regs - = optimize > 0 && only_leaf_regs_used () && leaf_function_p (); - #endif -- return 0; - } - - /* Insert a TYPE into the used types hash table of CFUN. */ -@@ -6518,7 +6515,8 @@ public: - /* opt_pass methods: */ - virtual unsigned int execute (function *) - { -- return rest_of_handle_check_leaf_regs (); -+ rest_of_handle_check_leaf_regs (); -+ return 0; - } - - }; // class pass_leaf_regs -@@ -6531,7 +6529,7 @@ make_pass_leaf_regs (gcc::context *ctxt) - return new pass_leaf_regs (ctxt); - } - --static unsigned int -+static void - rest_of_handle_thread_prologue_and_epilogue (function *fun) - { - /* prepare_shrink_wrap is sensitive to the block structure of the control -@@ -6563,8 +6561,6 @@ rest_of_handle_thread_prologue_and_epilogue (function *fun) - /* The stack usage info is finalized during prologue expansion. */ - if (flag_stack_usage_info || flag_callgraph_info) - output_stack_usage (); -- -- return 0; - } - - /* Record a final call to CALLEE at LOCATION. */ -@@ -6626,7 +6622,8 @@ public: - /* opt_pass methods: */ - unsigned int execute (function * fun) final override - { -- return rest_of_handle_thread_prologue_and_epilogue (fun); -+ rest_of_handle_thread_prologue_and_epilogue (fun); -+ return 0; - } - - }; // class pass_thread_prologue_and_epilogue -diff --git a/gcc/function.h b/gcc/function.h -index a53fb24d2..4e8131706 100644 ---- a/gcc/function.h -+++ b/gcc/function.h -@@ -653,11 +653,11 @@ extern rtx get_hard_reg_initial_val (machine_mode, unsigned int); - extern rtx has_hard_reg_initial_val (machine_mode, unsigned int); - - /* Called from gimple_expand_cfg. */ --extern unsigned int emit_initial_value_sets (void); -+extern void emit_initial_value_sets (void); - - extern bool initial_value_entry (int i, rtx *, rtx *); - extern void instantiate_decl_rtl (rtx x); --extern int aggregate_value_p (const_tree, const_tree); -+extern bool aggregate_value_p (const_tree, const_tree); - extern bool use_register_for_decl (const_tree); - extern gimple_seq gimplify_parameters (gimple_seq *); - extern void locate_and_pad_parm (machine_mode, tree, int, int, int, -@@ -698,9 +698,9 @@ extern void clobber_return_register (void); - extern void expand_function_end (void); - extern rtx get_arg_pointer_save_area (void); - extern void maybe_copy_prologue_epilogue_insn (rtx, rtx); --extern int prologue_contains (const rtx_insn *); --extern int epilogue_contains (const rtx_insn *); --extern int prologue_epilogue_contains (const rtx_insn *); -+extern bool prologue_contains (const rtx_insn *); -+extern bool epilogue_contains (const rtx_insn *); -+extern bool prologue_epilogue_contains (const rtx_insn *); - extern void record_prologue_seq (rtx_insn *); - extern void record_epilogue_seq (rtx_insn *); - extern void emit_return_into_block (bool simple_p, basic_block bb); --- -2.33.0 - diff --git a/0148-Backport-SME-Allow-prologues-and-epilogues-to-be-ins.patch b/0148-Backport-SME-Allow-prologues-and-epilogues-to-be-ins.patch deleted file mode 100644 index 13dc0e3..0000000 --- a/0148-Backport-SME-Allow-prologues-and-epilogues-to-be-ins.patch +++ /dev/null @@ -1,233 +0,0 @@ -From 417d51e1ecf41b3ba3ddf24eaf1e07db5c1ded9e Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 09:28:46 +0000 -Subject: [PATCH 049/157] [Backport][SME] Allow prologues and epilogues to be - inserted later - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e9d2ae6b9816e61a6148040149c63faa83f54702 - -Arm's SME adds a new processor mode called streaming mode. -This mode enables some new (matrix-oriented) instructions and -disables several existing groups of instructions, such as most -Advanced SIMD vector instructions and a much smaller set of SVE -instructions. It can also change the current vector length. - -There are instructions to switch in and out of streaming mode. -However, their effect on the ISA and vector length can't be represented -directly in RTL, so they need to be emitted late in the pass pipeline, -close to md_reorg. - -It's sometimes the responsibility of the prologue and epilogue to -switch modes, which means we need to emit the prologue and epilogue -sequences late as well. (This loses shrink-wrapping and scheduling -opportunities, but that's a price worth paying.) - -This patch therefore adds a target hook for forcing prologue -and epilogue insertion to happen later in the pipeline. - -gcc/ - * target.def (use_late_prologue_epilogue): New hook. - * doc/tm.texi.in: Add TARGET_USE_LATE_PROLOGUE_EPILOGUE. - * doc/tm.texi: Regenerate. - * passes.def (pass_late_thread_prologue_and_epilogue): New pass. - * tree-pass.h (make_pass_late_thread_prologue_and_epilogue): Declare. - * function.cc (pass_thread_prologue_and_epilogue::gate): New function. - (pass_data_late_thread_prologue_and_epilogue): New pass variable. - (pass_late_thread_prologue_and_epilogue): New pass class. - (make_pass_late_thread_prologue_and_epilogue): New function. ---- - gcc/doc/tm.texi | 19 ++++++++++++++++++ - gcc/doc/tm.texi.in | 2 ++ - gcc/function.cc | 50 ++++++++++++++++++++++++++++++++++++++++++++++ - gcc/passes.def | 3 +++ - gcc/target.def | 21 +++++++++++++++++++ - gcc/tree-pass.h | 2 ++ - 6 files changed, 97 insertions(+) - -diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi -index 5f0972356..d930d233d 100644 ---- a/gcc/doc/tm.texi -+++ b/gcc/doc/tm.texi -@@ -11684,6 +11684,25 @@ of the if-block in the @code{struct ce_if_block} structure that is pointed - to by @var{ce_info}. - @end defmac - -+@deftypefn {Target Hook} bool TARGET_USE_LATE_PROLOGUE_EPILOGUE () -+Return true if the current function's prologue and epilogue should -+be emitted late in the pass pipeline, instead of at the usual point. -+ -+Normally, the prologue and epilogue sequences are introduced soon after -+register allocation is complete. The advantage of this approach is that -+it allows the prologue and epilogue instructions to be optimized and -+scheduled with other code in the function. However, some targets -+require the prologue and epilogue to be the first and last sequences -+executed by the function, with no variation allowed. This hook should -+return true on such targets. -+ -+The default implementation returns false, which is correct for most -+targets. The hook should only return true if there is a specific -+target limitation that cannot be described in RTL. For example, -+the hook might return true if the prologue and epilogue need to switch -+between instruction sets. -+@end deftypefn -+ - @deftypefn {Target Hook} void TARGET_MACHINE_DEPENDENT_REORG (void) - If non-null, this hook performs a target-specific pass over the - instruction stream. The compiler will run it at all optimization levels, -diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in -index fcab21744..19eabec48 100644 ---- a/gcc/doc/tm.texi.in -+++ b/gcc/doc/tm.texi.in -@@ -7708,6 +7708,8 @@ of the if-block in the @code{struct ce_if_block} structure that is pointed - to by @var{ce_info}. - @end defmac - -+@hook TARGET_USE_LATE_PROLOGUE_EPILOGUE -+ - @hook TARGET_MACHINE_DEPENDENT_REORG - - @hook TARGET_INIT_BUILTINS -diff --git a/gcc/function.cc b/gcc/function.cc -index fc8eb5812..7c90b5f23 100644 ---- a/gcc/function.cc -+++ b/gcc/function.cc -@@ -84,6 +84,7 @@ along with GCC; see the file COPYING3. If not see - #include "function-abi.h" - #include "value-range.h" - #include "gimple-range.h" -+#include "insn-attr.h" - - /* So we can assign to cfun in this file. */ - #undef cfun -@@ -6620,6 +6621,11 @@ public: - {} - - /* opt_pass methods: */ -+ bool gate (function *) final override -+ { -+ return !targetm.use_late_prologue_epilogue (); -+ } -+ - unsigned int execute (function * fun) final override - { - rest_of_handle_thread_prologue_and_epilogue (fun); -@@ -6628,6 +6634,44 @@ public: - - }; // class pass_thread_prologue_and_epilogue - -+const pass_data pass_data_late_thread_prologue_and_epilogue = -+{ -+ RTL_PASS, /* type */ -+ "late_pro_and_epilogue", /* name */ -+ OPTGROUP_NONE, /* optinfo_flags */ -+ TV_THREAD_PROLOGUE_AND_EPILOGUE, /* tv_id */ -+ 0, /* properties_required */ -+ 0, /* properties_provided */ -+ 0, /* properties_destroyed */ -+ 0, /* todo_flags_start */ -+ ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */ -+}; -+ -+class pass_late_thread_prologue_and_epilogue : public rtl_opt_pass -+{ -+public: -+ pass_late_thread_prologue_and_epilogue (gcc::context *ctxt) -+ : rtl_opt_pass (pass_data_late_thread_prologue_and_epilogue, ctxt) -+ {} -+ -+ /* opt_pass methods: */ -+ bool gate (function *) final override -+ { -+ return targetm.use_late_prologue_epilogue (); -+ } -+ -+ unsigned int execute (function *fn) final override -+ { -+ /* It's not currently possible to have both delay slots and -+ late prologue/epilogue, since the latter has to run before -+ the former, and the former won't honor whatever restrictions -+ the latter is trying to enforce. */ -+ gcc_assert (!DELAY_SLOTS); -+ rest_of_handle_thread_prologue_and_epilogue (fn); -+ return 0; -+ } -+}; // class pass_late_thread_prologue_and_epilogue -+ - } // anon namespace - - rtl_opt_pass * -@@ -6636,6 +6680,12 @@ make_pass_thread_prologue_and_epilogue (gcc::context *ctxt) - return new pass_thread_prologue_and_epilogue (ctxt); - } - -+rtl_opt_pass * -+make_pass_late_thread_prologue_and_epilogue (gcc::context *ctxt) -+{ -+ return new pass_late_thread_prologue_and_epilogue (ctxt); -+} -+ - namespace { - - const pass_data pass_data_zero_call_used_regs = -diff --git a/gcc/passes.def b/gcc/passes.def -index cdc600298..8797f166f 100644 ---- a/gcc/passes.def -+++ b/gcc/passes.def -@@ -523,6 +523,9 @@ along with GCC; see the file COPYING3. If not see - NEXT_PASS (pass_stack_regs_run); - POP_INSERT_PASSES () - POP_INSERT_PASSES () -+ NEXT_PASS (pass_late_thread_prologue_and_epilogue); -+ /* No target-independent code motion is allowed beyond this point, -+ excepting the legacy delayed-branch pass. */ - NEXT_PASS (pass_late_compilation); - PUSH_INSERT_PASSES_WITHIN (pass_late_compilation) - NEXT_PASS (pass_zero_call_used_regs); -diff --git a/gcc/target.def b/gcc/target.def -index 4d77c1523..fd4899612 100644 ---- a/gcc/target.def -+++ b/gcc/target.def -@@ -4120,6 +4120,27 @@ returns @code{VOIDmode}.", - machine_mode, (machine_mode m1, machine_mode m2), - default_cc_modes_compatible) - -+DEFHOOK -+(use_late_prologue_epilogue, -+ "Return true if the current function's prologue and epilogue should\n\ -+be emitted late in the pass pipeline, instead of at the usual point.\n\ -+\n\ -+Normally, the prologue and epilogue sequences are introduced soon after\n\ -+register allocation is complete. The advantage of this approach is that\n\ -+it allows the prologue and epilogue instructions to be optimized and\n\ -+scheduled with other code in the function. However, some targets\n\ -+require the prologue and epilogue to be the first and last sequences\n\ -+executed by the function, with no variation allowed. This hook should\n\ -+return true on such targets.\n\ -+\n\ -+The default implementation returns false, which is correct for most\n\ -+targets. The hook should only return true if there is a specific\n\ -+target limitation that cannot be described in RTL. For example,\n\ -+the hook might return true if the prologue and epilogue need to switch\n\ -+between instruction sets.", -+ bool, (), -+ hook_bool_void_false) -+ - /* Do machine-dependent code transformations. Called just before - delayed-branch scheduling. */ - DEFHOOK -diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h -index 34e60bc38..1c983ef71 100644 ---- a/gcc/tree-pass.h -+++ b/gcc/tree-pass.h -@@ -612,6 +612,8 @@ extern rtl_opt_pass *make_pass_gcse2 (gcc::context *ctxt); - extern rtl_opt_pass *make_pass_split_after_reload (gcc::context *ctxt); - extern rtl_opt_pass *make_pass_thread_prologue_and_epilogue (gcc::context - *ctxt); -+extern rtl_opt_pass *make_pass_late_thread_prologue_and_epilogue (gcc::context -+ *ctxt); - extern rtl_opt_pass *make_pass_zero_call_used_regs (gcc::context *ctxt); - extern rtl_opt_pass *make_pass_split_complex_instructions (gcc::context *ctxt); - extern rtl_opt_pass *make_pass_stack_adjustments (gcc::context *ctxt); --- -2.33.0 - diff --git a/0149-Backport-SME-Add-a-target-hook-for-sibcall-epilogues.patch b/0149-Backport-SME-Add-a-target-hook-for-sibcall-epilogues.patch deleted file mode 100644 index d892432..0000000 --- a/0149-Backport-SME-Add-a-target-hook-for-sibcall-epilogues.patch +++ /dev/null @@ -1,239 +0,0 @@ -From e906213086639df81085a0101bf88fb66c1dbc2b Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 09:35:57 +0000 -Subject: [PATCH 050/157] [Backport][SME] Add a target hook for sibcall - epilogues - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2e0aefa77157396acb48833407637303edba450a - -Epilogues for sibling calls are generated using the -sibcall_epilogue pattern. One disadvantage of this approach -is that the target doesn't know which call the epilogue is for, -even though the code that generates the pattern has the call -to hand. - -Although call instructions are currently rtxes, and so could be -passed as an operand to the pattern, the main point of introducing -rtx_insn was to move towards separating the rtx and insn types -(a good thing IMO). There also isn't an existing practice of -passing genuine instructions (as opposed to labels) to -instruction patterns. - -This patch therefore adds a hook that can be defined as an -alternative to sibcall_epilogue. The advantage is that it -can be passed the call; the disadvantage is that it can't -use .md conveniences like generating instructions from -textual patterns (although most epilogues are too complex -to benefit much from that anyway). - -gcc/ - * doc/tm.texi.in: Add TARGET_EMIT_EPILOGUE_FOR_SIBCALL. - * doc/tm.texi: Regenerate. - * target.def (emit_epilogue_for_sibcall): New hook. - * calls.cc (can_implement_as_sibling_call_p): Use it. - * function.cc (thread_prologue_and_epilogue_insns): Likewise. - (reposition_prologue_and_epilogue_notes): Likewise. - * config/aarch64/aarch64-protos.h (aarch64_expand_epilogue): Take - an rtx_call_insn * rather than a bool. - * config/aarch64/aarch64.cc (aarch64_expand_epilogue): Likewise. - (TARGET_EMIT_EPILOGUE_FOR_SIBCALL): Define. - * config/aarch64/aarch64.md (epilogue): Update call. - (sibcall_epilogue): Delete. ---- - gcc/calls.cc | 3 ++- - gcc/config/aarch64/aarch64-protos.h | 2 +- - gcc/config/aarch64/aarch64.cc | 11 +++++++---- - gcc/config/aarch64/aarch64.md | 11 +---------- - gcc/doc/tm.texi | 8 ++++++++ - gcc/doc/tm.texi.in | 2 ++ - gcc/function.cc | 15 +++++++++++++-- - gcc/target.def | 9 +++++++++ - 8 files changed, 43 insertions(+), 18 deletions(-) - -diff --git a/gcc/calls.cc b/gcc/calls.cc -index 4d0bc45be..c1db66883 100644 ---- a/gcc/calls.cc -+++ b/gcc/calls.cc -@@ -2461,7 +2461,8 @@ can_implement_as_sibling_call_p (tree exp, - tree addr, - const args_size &args_size) - { -- if (!targetm.have_sibcall_epilogue ()) -+ if (!targetm.have_sibcall_epilogue () -+ && !targetm.emit_epilogue_for_sibcall) - { - maybe_complain_about_tail_call - (exp, -diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h -index 86e444a60..97984f3ab 100644 ---- a/gcc/config/aarch64/aarch64-protos.h -+++ b/gcc/config/aarch64/aarch64-protos.h -@@ -887,7 +887,7 @@ const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *); - const char * aarch64_output_probe_stack_range (rtx, rtx); - const char * aarch64_output_probe_sve_stack_clash (rtx, rtx, rtx, rtx); - void aarch64_err_no_fpadvsimd (machine_mode); --void aarch64_expand_epilogue (bool); -+void aarch64_expand_epilogue (rtx_call_insn *); - rtx aarch64_ptrue_all (unsigned int); - opt_machine_mode aarch64_ptrue_all_mode (rtx); - rtx aarch64_convert_sve_data_to_pred (rtx, machine_mode, rtx); -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index fd1114b52..055b436b1 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -10046,7 +10046,7 @@ aarch64_use_return_insn_p (void) - from a deallocated stack, and we optimize the unwind records by - emitting them all together if possible. */ - void --aarch64_expand_epilogue (bool for_sibcall) -+aarch64_expand_epilogue (rtx_call_insn *sibcall) - { - poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; - HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; -@@ -10194,7 +10194,7 @@ aarch64_expand_epilogue (bool for_sibcall) - explicitly authenticate. - */ - if (aarch64_return_address_signing_enabled () -- && (for_sibcall || !TARGET_ARMV8_3)) -+ && (sibcall || !TARGET_ARMV8_3)) - { - switch (aarch64_ra_sign_key) - { -@@ -10212,7 +10212,7 @@ aarch64_expand_epilogue (bool for_sibcall) - } - - /* Stack adjustment for exception handler. */ -- if (crtl->calls_eh_return && !for_sibcall) -+ if (crtl->calls_eh_return && !sibcall) - { - /* We need to unwind the stack by the offset computed by - EH_RETURN_STACKADJ_RTX. We have already reset the CFA -@@ -10223,7 +10223,7 @@ aarch64_expand_epilogue (bool for_sibcall) - } - - emit_use (gen_rtx_REG (DImode, LR_REGNUM)); -- if (!for_sibcall) -+ if (!sibcall) - emit_jump_insn (ret_rtx); - } - -@@ -28246,6 +28246,9 @@ aarch64_libgcc_floating_mode_supported_p - #undef TARGET_HAVE_SHADOW_CALL_STACK - #define TARGET_HAVE_SHADOW_CALL_STACK true - -+#undef TARGET_EMIT_EPILOGUE_FOR_SIBCALL -+#define TARGET_EMIT_EPILOGUE_FOR_SIBCALL aarch64_expand_epilogue -+ - struct gcc_target targetm = TARGET_INITIALIZER; - - #include "gt-aarch64.h" -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index 7267a74d6..a78476c8a 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -871,16 +871,7 @@ - [(clobber (const_int 0))] - "" - " -- aarch64_expand_epilogue (false); -- DONE; -- " --) -- --(define_expand "sibcall_epilogue" -- [(clobber (const_int 0))] -- "" -- " -- aarch64_expand_epilogue (true); -+ aarch64_expand_epilogue (nullptr); - DONE; - " - ) -diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi -index d930d233d..369f4b8da 100644 ---- a/gcc/doc/tm.texi -+++ b/gcc/doc/tm.texi -@@ -11703,6 +11703,14 @@ the hook might return true if the prologue and epilogue need to switch - between instruction sets. - @end deftypefn - -+@deftypefn {Target Hook} void TARGET_EMIT_EPILOGUE_FOR_SIBCALL (rtx_call_insn *@var{call}) -+If defined, this hook emits an epilogue sequence for sibling (tail) -+call instruction @var{call}. Another way of providing epilogues -+for sibling calls is to define the @code{sibcall_epilogue} instruction -+pattern; the main advantage of this hook over the pattern is that it -+has access to the call instruction. -+@end deftypefn -+ - @deftypefn {Target Hook} void TARGET_MACHINE_DEPENDENT_REORG (void) - If non-null, this hook performs a target-specific pass over the - instruction stream. The compiler will run it at all optimization levels, -diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in -index 19eabec48..748b0777a 100644 ---- a/gcc/doc/tm.texi.in -+++ b/gcc/doc/tm.texi.in -@@ -7710,6 +7710,8 @@ to by @var{ce_info}. - - @hook TARGET_USE_LATE_PROLOGUE_EPILOGUE - -+@hook TARGET_EMIT_EPILOGUE_FOR_SIBCALL -+ - @hook TARGET_MACHINE_DEPENDENT_REORG - - @hook TARGET_INIT_BUILTINS -diff --git a/gcc/function.cc b/gcc/function.cc -index 7c90b5f23..ddab43ca4 100644 ---- a/gcc/function.cc -+++ b/gcc/function.cc -@@ -6209,7 +6209,17 @@ thread_prologue_and_epilogue_insns (void) - if (!(CALL_P (insn) && SIBLING_CALL_P (insn))) - continue; - -- if (rtx_insn *ep_seq = targetm.gen_sibcall_epilogue ()) -+ rtx_insn *ep_seq; -+ if (targetm.emit_epilogue_for_sibcall) -+ { -+ start_sequence (); -+ targetm.emit_epilogue_for_sibcall (as_a (insn)); -+ ep_seq = get_insns (); -+ end_sequence (); -+ } -+ else -+ ep_seq = targetm.gen_sibcall_epilogue (); -+ if (ep_seq) - { - start_sequence (); - emit_note (NOTE_INSN_EPILOGUE_BEG); -@@ -6259,7 +6269,8 @@ reposition_prologue_and_epilogue_notes (void) - { - if (!targetm.have_prologue () - && !targetm.have_epilogue () -- && !targetm.have_sibcall_epilogue ()) -+ && !targetm.have_sibcall_epilogue () -+ && !targetm.emit_epilogue_for_sibcall) - return; - - /* Since the hash table is created on demand, the fact that it is -diff --git a/gcc/target.def b/gcc/target.def -index fd4899612..cf9f96eba 100644 ---- a/gcc/target.def -+++ b/gcc/target.def -@@ -4141,6 +4141,15 @@ between instruction sets.", - bool, (), - hook_bool_void_false) - -+DEFHOOK -+(emit_epilogue_for_sibcall, -+ "If defined, this hook emits an epilogue sequence for sibling (tail)\n\ -+call instruction @var{call}. Another way of providing epilogues\n\ -+for sibling calls is to define the @code{sibcall_epilogue} instruction\n\ -+pattern; the main advantage of this hook over the pattern is that it\n\ -+has access to the call instruction.", -+ void, (rtx_call_insn *call), NULL) -+ - /* Do machine-dependent code transformations. Called just before - delayed-branch scheduling. */ - DEFHOOK --- -2.33.0 - diff --git a/0150-Backport-SME-Add-a-new-target-hook-TARGET_START_CALL.patch b/0150-Backport-SME-Add-a-new-target-hook-TARGET_START_CALL.patch deleted file mode 100644 index 4c71f2a..0000000 --- a/0150-Backport-SME-Add-a-new-target-hook-TARGET_START_CALL.patch +++ /dev/null @@ -1,461 +0,0 @@ -From 58adede22d9ff2368b5c24ec3fc0e53bd3ddc8bd Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 09:44:52 +0000 -Subject: [PATCH 051/157] [Backport][SME] Add a new target hook: - TARGET_START_CALL_ARGS - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=672fad57c1f99ff893019e2da4620e26b9b31dd2 - -We have the following two hooks into the call expansion code: - -- TARGET_CALL_ARGS is called for each argument before arguments - are moved into hard registers. - -- TARGET_END_CALL_ARGS is called after the end of the call - sequence (specifically, after any return value has been - moved to a pseudo). - -This patch adds a TARGET_START_CALL_ARGS hook that is called before -the TARGET_CALL_ARGS sequence. This means that TARGET_START_CALL_REGS -and TARGET_END_CALL_REGS bracket the region in which argument registers -might be live. They also bracket a region in which the only call -emiitted by target-independent code is the call to the target function -itself. (For example, TARGET_START_CALL_ARGS happens after any use of -memcpy to copy arguments, and TARGET_END_CALL_ARGS happens before any -use of memcpy to copy the result.) - -Also, the patch adds the cumulative argument structure as an argument -to the hooks, so that the target can use it to record and retrieve -information about the call as a whole. - -The TARGET_CALL_ARGS docs said: - - While generating RTL for a function call, this target hook is invoked once - for each argument passed to the function, either a register returned by - ``TARGET_FUNCTION_ARG`` or a memory location. It is called just -- before the point where argument registers are stored. - -The last bit was true for normal calls, but for libcalls the hook was -invoked earlier, before stack arguments have been copied. I don't think -this caused a practical difference for nvptx (the only port to use the -hooks) since I wouldn't expect any libcalls to take stack parameters. - -gcc/ - * doc/tm.texi.in: Add TARGET_START_CALL_ARGS. - * doc/tm.texi: Regenerate. - * target.def (start_call_args): New hook. - (call_args, end_call_args): Add a parameter for the cumulative - argument information. - * hooks.h (hook_void_rtx_tree): Delete. - * hooks.cc (hook_void_rtx_tree): Likewise. - * targhooks.h (hook_void_CUMULATIVE_ARGS): Declare. - (hook_void_CUMULATIVE_ARGS_rtx_tree): Likewise. - * targhooks.cc (hook_void_CUMULATIVE_ARGS): New function. - (hook_void_CUMULATIVE_ARGS_rtx_tree): Likewise. - * calls.cc (expand_call): Call start_call_args before computing - and storing stack parameters. Pass the cumulative argument - information to call_args and end_call_args. - (emit_library_call_value_1): Likewise. - * config/nvptx/nvptx.cc (nvptx_call_args): Add a cumulative - argument parameter. - (nvptx_end_call_args): Likewise. ---- - gcc/calls.cc | 61 +++++++++++++++++++++------------------ - gcc/config/nvptx/nvptx.cc | 4 +-- - gcc/doc/tm.texi | 53 +++++++++++++++++++++++++++------- - gcc/doc/tm.texi.in | 2 ++ - gcc/hooks.cc | 5 ---- - gcc/hooks.h | 1 - - gcc/target.def | 59 +++++++++++++++++++++++++++++-------- - gcc/targhooks.cc | 10 +++++++ - gcc/targhooks.h | 5 ++-- - 9 files changed, 140 insertions(+), 60 deletions(-) - -diff --git a/gcc/calls.cc b/gcc/calls.cc -index c1db66883..4a8535cc6 100644 ---- a/gcc/calls.cc -+++ b/gcc/calls.cc -@@ -3507,15 +3507,26 @@ expand_call (tree exp, rtx target, int ignore) - sibcall_failure = 1; - } - -+ /* Set up the next argument register. For sibling calls on machines -+ with register windows this should be the incoming register. */ -+ if (pass == 0) -+ next_arg_reg = targetm.calls.function_incoming_arg -+ (args_so_far, function_arg_info::end_marker ()); -+ else -+ next_arg_reg = targetm.calls.function_arg -+ (args_so_far, function_arg_info::end_marker ()); -+ -+ targetm.calls.start_call_args (args_so_far); -+ - bool any_regs = false; - for (i = 0; i < num_actuals; i++) - if (args[i].reg != NULL_RTX) - { - any_regs = true; -- targetm.calls.call_args (args[i].reg, funtype); -+ targetm.calls.call_args (args_so_far, args[i].reg, funtype); - } - if (!any_regs) -- targetm.calls.call_args (pc_rtx, funtype); -+ targetm.calls.call_args (args_so_far, pc_rtx, funtype); - - /* Figure out the register where the value, if any, will come back. */ - valreg = 0; -@@ -3578,15 +3589,6 @@ expand_call (tree exp, rtx target, int ignore) - later safely search backwards to find the CALL_INSN. */ - before_call = get_last_insn (); - -- /* Set up next argument register. For sibling calls on machines -- with register windows this should be the incoming register. */ -- if (pass == 0) -- next_arg_reg = targetm.calls.function_incoming_arg -- (args_so_far, function_arg_info::end_marker ()); -- else -- next_arg_reg = targetm.calls.function_arg -- (args_so_far, function_arg_info::end_marker ()); -- - if (pass == 1 && (return_flags & ERF_RETURNS_ARG)) - { - int arg_nr = return_flags & ERF_RETURN_ARG_MASK; -@@ -3879,7 +3881,7 @@ expand_call (tree exp, rtx target, int ignore) - for (i = 0; i < num_actuals; ++i) - free (args[i].aligned_regs); - -- targetm.calls.end_call_args (); -+ targetm.calls.end_call_args (args_so_far); - - insns = get_insns (); - end_sequence (); -@@ -4437,17 +4439,9 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, - } - #endif - -- /* When expanding a normal call, args are stored in push order, -- which is the reverse of what we have here. */ -- bool any_regs = false; -- for (int i = nargs; i-- > 0; ) -- if (argvec[i].reg != NULL_RTX) -- { -- targetm.calls.call_args (argvec[i].reg, NULL_TREE); -- any_regs = true; -- } -- if (!any_regs) -- targetm.calls.call_args (pc_rtx, NULL_TREE); -+ rtx call_cookie -+ = targetm.calls.function_arg (args_so_far, -+ function_arg_info::end_marker ()); - - /* Push the args that need to be pushed. */ - -@@ -4565,6 +4559,20 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, - - fun = prepare_call_address (NULL, fun, NULL, &call_fusage, 0, 0); - -+ targetm.calls.start_call_args (args_so_far); -+ -+ /* When expanding a normal call, args are stored in push order, -+ which is the reverse of what we have here. */ -+ bool any_regs = false; -+ for (int i = nargs; i-- > 0; ) -+ if (argvec[i].reg != NULL_RTX) -+ { -+ targetm.calls.call_args (args_so_far, argvec[i].reg, NULL_TREE); -+ any_regs = true; -+ } -+ if (!any_regs) -+ targetm.calls.call_args (args_so_far, pc_rtx, NULL_TREE); -+ - /* Now load any reg parms into their regs. */ - - /* ARGNUM indexes the ARGVEC array in the order in which the arguments -@@ -4671,10 +4679,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, - get_identifier (XSTR (orgfun, 0)), - build_function_type (tfom, NULL_TREE), - original_args_size.constant, args_size.constant, -- struct_value_size, -- targetm.calls.function_arg (args_so_far, -- function_arg_info::end_marker ()), -- valreg, -+ struct_value_size, call_cookie, valreg, - old_inhibit_defer_pop + 1, call_fusage, flags, args_so_far); - - if (flag_ipa_ra) -@@ -4694,7 +4699,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, - valreg = gen_rtx_REG (TYPE_MODE (tfom), REGNO (valreg)); - } - -- targetm.calls.end_call_args (); -+ targetm.calls.end_call_args (args_so_far); - - /* For calls to `setjmp', etc., inform function.cc:setjmp_warnings - that it should complain if nonvolatile values are live. For -diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc -index 3634a49de..7f2103ba6 100644 ---- a/gcc/config/nvptx/nvptx.cc -+++ b/gcc/config/nvptx/nvptx.cc -@@ -1780,7 +1780,7 @@ nvptx_get_drap_rtx (void) - argument to the next call. */ - - static void --nvptx_call_args (rtx arg, tree fntype) -+nvptx_call_args (cumulative_args_t, rtx arg, tree fntype) - { - if (!cfun->machine->doing_call) - { -@@ -1808,7 +1808,7 @@ nvptx_call_args (rtx arg, tree fntype) - information we recorded. */ - - static void --nvptx_end_call_args (void) -+nvptx_end_call_args (cumulative_args_t) - { - cfun->machine->doing_call = false; - free_EXPR_LIST_list (&cfun->machine->call_args); -diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi -index 369f4b8da..357c29a4d 100644 ---- a/gcc/doc/tm.texi -+++ b/gcc/doc/tm.texi -@@ -5392,26 +5392,59 @@ except the last are treated as named. - You need not define this hook if it always returns @code{false}. - @end deftypefn - --@deftypefn {Target Hook} void TARGET_CALL_ARGS (rtx, @var{tree}) -+@deftypefn {Target Hook} void TARGET_START_CALL_ARGS (cumulative_args_t @var{complete_args}) -+This target hook is invoked while generating RTL for a function call, -+after the argument values have been computed, and after stack arguments -+have been initialized, but before register arguments have been moved into -+their ABI-defined hard register locations. It precedes calls to the related -+hooks @code{TARGET_CALL_ARGS} and @code{TARGET_END_CALL_ARGS}. -+The significance of this position in the call expansion is that: -+ -+@itemize @bullet -+@item -+No argument registers are live. -+@item -+Although a call sequence can in general involve subcalls (such as using -+@code{memcpy} to copy large arguments), no such subcall will occur between -+the call to this hook and the generation of the main call instruction. -+@end itemize -+ -+The single argument @var{complete_args} is the state of the target -+function's cumulative argument information after the final call to -+@code{TARGET_FUNCTION_ARG}. -+ -+The hook can be used for things like switching processor mode, in cases -+where different calls need different processor modes. Most ports do not -+need to implement anything for this hook. -+@end deftypefn -+ -+@deftypefn {Target Hook} void TARGET_CALL_ARGS (cumulative_args_t @var{complete_args}, rtx @var{loc}, tree @var{type}) - While generating RTL for a function call, this target hook is invoked once - for each argument passed to the function, either a register returned by - @code{TARGET_FUNCTION_ARG} or a memory location. It is called just --before the point where argument registers are stored. The type of the --function to be called is also passed as the second argument; it is --@code{NULL_TREE} for libcalls. The @code{TARGET_END_CALL_ARGS} hook is --invoked just after the code to copy the return reg has been emitted. --This functionality can be used to perform special setup of call argument --registers if a target needs it. -+before the point where argument registers are stored. -+ -+@var{complete_args} is the state of the target function's cumulative -+argument information after the final call to @code{TARGET_FUNCTION_ARG}. -+@var{loc} is the location of the argument. @var{type} is the type of -+the function being called, or @code{NULL_TREE} for libcalls. -+ - For functions without arguments, the hook is called once with @code{pc_rtx} - passed instead of an argument register. --Most ports do not need to implement anything for this hook. -+ -+This functionality can be used to perform special setup of call argument -+registers, if a target needs it. Most ports do not need to implement -+anything for this hook. - @end deftypefn - --@deftypefn {Target Hook} void TARGET_END_CALL_ARGS (void) -+@deftypefn {Target Hook} void TARGET_END_CALL_ARGS (cumulative_args_t @var{complete_args}) - This target hook is invoked while generating RTL for a function call, - just after the point where the return reg is copied into a pseudo. It - signals that all the call argument and return registers for the just --emitted call are now no longer in use. -+emitted call are now no longer in use. @var{complete_args} is the -+state of the target function's cumulative argument information after -+the final call to @code{TARGET_FUNCTION_ARG}. -+ - Most ports do not need to implement anything for this hook. - @end deftypefn - -diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in -index 748b0777a..4ebc9afbf 100644 ---- a/gcc/doc/tm.texi.in -+++ b/gcc/doc/tm.texi.in -@@ -3774,6 +3774,8 @@ These machine description macros help implement varargs: - - @hook TARGET_STRICT_ARGUMENT_NAMING - -+@hook TARGET_START_CALL_ARGS -+ - @hook TARGET_CALL_ARGS - - @hook TARGET_END_CALL_ARGS -diff --git a/gcc/hooks.cc b/gcc/hooks.cc -index b29233f4f..0f4e7ce10 100644 ---- a/gcc/hooks.cc -+++ b/gcc/hooks.cc -@@ -280,11 +280,6 @@ hook_void_FILEptr_tree (FILE *, tree) - { - } - --void --hook_void_rtx_tree (rtx, tree) --{ --} -- - void - hook_void_constcharptr (const char *) - { -diff --git a/gcc/hooks.h b/gcc/hooks.h -index 1056e1e9e..e2a742f43 100644 ---- a/gcc/hooks.h -+++ b/gcc/hooks.h -@@ -83,7 +83,6 @@ extern void hook_void_FILEptr_constcharptr (FILE *, const char *); - extern void hook_void_FILEptr_constcharptr_const_tree (FILE *, const char *, - const_tree); - extern bool hook_bool_FILEptr_rtx_false (FILE *, rtx); --extern void hook_void_rtx_tree (rtx, tree); - extern void hook_void_FILEptr_tree (FILE *, tree); - extern void hook_void_tree (tree); - extern void hook_void_tree_treeptr (tree, tree *); -diff --git a/gcc/target.def b/gcc/target.def -index cf9f96eba..a57e51b0d 100644 ---- a/gcc/target.def -+++ b/gcc/target.def -@@ -4784,32 +4784,67 @@ not generate any instructions in this case.", - int *pretend_args_size, int second_time), - default_setup_incoming_varargs) - -+DEFHOOK -+(start_call_args, -+ "This target hook is invoked while generating RTL for a function call,\n\ -+after the argument values have been computed, and after stack arguments\n\ -+have been initialized, but before register arguments have been moved into\n\ -+their ABI-defined hard register locations. It precedes calls to the related\n\ -+hooks @code{TARGET_CALL_ARGS} and @code{TARGET_END_CALL_ARGS}.\n\ -+The significance of this position in the call expansion is that:\n\ -+\n\ -+@itemize @bullet\n\ -+@item\n\ -+No argument registers are live.\n\ -+@item\n\ -+Although a call sequence can in general involve subcalls (such as using\n\ -+@code{memcpy} to copy large arguments), no such subcall will occur between\n\ -+the call to this hook and the generation of the main call instruction.\n\ -+@end itemize\n\ -+\n\ -+The single argument @var{complete_args} is the state of the target\n\ -+function's cumulative argument information after the final call to\n\ -+@code{TARGET_FUNCTION_ARG}.\n\ -+\n\ -+The hook can be used for things like switching processor mode, in cases\n\ -+where different calls need different processor modes. Most ports do not\n\ -+need to implement anything for this hook.", -+ void, (cumulative_args_t complete_args), -+ hook_void_CUMULATIVE_ARGS) -+ - DEFHOOK - (call_args, - "While generating RTL for a function call, this target hook is invoked once\n\ - for each argument passed to the function, either a register returned by\n\ - @code{TARGET_FUNCTION_ARG} or a memory location. It is called just\n\ --before the point where argument registers are stored. The type of the\n\ --function to be called is also passed as the second argument; it is\n\ --@code{NULL_TREE} for libcalls. The @code{TARGET_END_CALL_ARGS} hook is\n\ --invoked just after the code to copy the return reg has been emitted.\n\ --This functionality can be used to perform special setup of call argument\n\ --registers if a target needs it.\n\ -+before the point where argument registers are stored.\n\ -+\n\ -+@var{complete_args} is the state of the target function's cumulative\n\ -+argument information after the final call to @code{TARGET_FUNCTION_ARG}.\n\ -+@var{loc} is the location of the argument. @var{type} is the type of\n\ -+the function being called, or @code{NULL_TREE} for libcalls.\n\ -+\n\ - For functions without arguments, the hook is called once with @code{pc_rtx}\n\ - passed instead of an argument register.\n\ --Most ports do not need to implement anything for this hook.", -- void, (rtx, tree), -- hook_void_rtx_tree) -+\n\ -+This functionality can be used to perform special setup of call argument\n\ -+registers, if a target needs it. Most ports do not need to implement\n\ -+anything for this hook.", -+ void, (cumulative_args_t complete_args, rtx loc, tree type), -+ hook_void_CUMULATIVE_ARGS_rtx_tree) - - DEFHOOK - (end_call_args, - "This target hook is invoked while generating RTL for a function call,\n\ - just after the point where the return reg is copied into a pseudo. It\n\ - signals that all the call argument and return registers for the just\n\ --emitted call are now no longer in use.\n\ -+emitted call are now no longer in use. @var{complete_args} is the\n\ -+state of the target function's cumulative argument information after\n\ -+the final call to @code{TARGET_FUNCTION_ARG}.\n\ -+\n\ - Most ports do not need to implement anything for this hook.", -- void, (void), -- hook_void_void) -+ void, (cumulative_args_t complete_args), -+ hook_void_CUMULATIVE_ARGS) - - DEFHOOK - (push_argument, -diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc -index 399d6f874..c88afa5db 100644 ---- a/gcc/targhooks.cc -+++ b/gcc/targhooks.cc -@@ -772,12 +772,22 @@ hook_int_CUMULATIVE_ARGS_arg_info_0 (cumulative_args_t, - return 0; - } - -+void -+hook_void_CUMULATIVE_ARGS (cumulative_args_t) -+{ -+} -+ - void - hook_void_CUMULATIVE_ARGS_tree (cumulative_args_t ca ATTRIBUTE_UNUSED, - tree ATTRIBUTE_UNUSED) - { - } - -+void -+hook_void_CUMULATIVE_ARGS_rtx_tree (cumulative_args_t, rtx, tree) -+{ -+} -+ - /* Default implementation of TARGET_PUSH_ARGUMENT. */ - - bool -diff --git a/gcc/targhooks.h b/gcc/targhooks.h -index ecce55ebe..c6e12fc2e 100644 ---- a/gcc/targhooks.h -+++ b/gcc/targhooks.h -@@ -138,8 +138,9 @@ extern bool hook_bool_CUMULATIVE_ARGS_arg_info_true - (cumulative_args_t, const function_arg_info &); - extern int hook_int_CUMULATIVE_ARGS_arg_info_0 - (cumulative_args_t, const function_arg_info &); --extern void hook_void_CUMULATIVE_ARGS_tree -- (cumulative_args_t, tree); -+extern void hook_void_CUMULATIVE_ARGS (cumulative_args_t); -+extern void hook_void_CUMULATIVE_ARGS_tree (cumulative_args_t, tree); -+extern void hook_void_CUMULATIVE_ARGS_rtx_tree (cumulative_args_t, rtx, tree); - extern const char *hook_invalid_arg_for_unprototyped_fn - (const_tree, const_tree, const_tree); - extern void default_function_arg_advance --- -2.33.0 - diff --git a/0151-Backport-SME-Allow-targets-to-add-USEs-to-asms.patch b/0151-Backport-SME-Allow-targets-to-add-USEs-to-asms.patch deleted file mode 100644 index cb06751..0000000 --- a/0151-Backport-SME-Allow-targets-to-add-USEs-to-asms.patch +++ /dev/null @@ -1,490 +0,0 @@ -From 8684458c3faf358e5a15dfb73b4ef632341ddf0a Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 09:52:41 +0000 -Subject: [PATCH 052/157] [Backport][SME] Allow targets to add USEs to asms - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=414d795d8a365b6e72a84257caa36cb3bed7e0ba - -Arm's SME has an array called ZA that for inline asm purposes -is effectively a form of special-purpose memory. It doesn't -have an associated storage type and so can't be passed and -returned in normal C/C++ objects. - -We'd therefore like "za" in a clobber list to mean that an inline -asm can read from and write to ZA. (Just reading or writing -individually is unlikely to be useful, but we could add syntax -for that too if necessary.) - -There is currently a TARGET_MD_ASM_ADJUST target hook that allows -targets to add clobbers to an asm instruction. This patch -extends that to allow targets to add USEs as well. - -gcc/ - * target.def (md_asm_adjust): Add a uses parameter. - * doc/tm.texi: Regenerate. - * cfgexpand.cc (expand_asm_loc): Update call to md_asm_adjust. - Handle any USEs created by the target. - (expand_asm_stmt): Likewise. - * recog.cc (asm_noperands): Handle asms with USEs. - (decode_asm_operands): Likewise. - * config/arm/aarch-common-protos.h (arm_md_asm_adjust): Add uses - parameter. - * config/arm/aarch-common.cc (arm_md_asm_adjust): Likewise. - * config/arm/arm.cc (thumb1_md_asm_adjust): Likewise. - * config/avr/avr.cc (avr_md_asm_adjust): Likewise. - * config/cris/cris.cc (cris_md_asm_adjust): Likewise. - * config/i386/i386.cc (ix86_md_asm_adjust): Likewise. - * config/mn10300/mn10300.cc (mn10300_md_asm_adjust): Likewise. - * config/nds32/nds32.cc (nds32_md_asm_adjust): Likewise. - * config/pdp11/pdp11.cc (pdp11_md_asm_adjust): Likewise. - * config/rs6000/rs6000.cc (rs6000_md_asm_adjust): Likewise. - * config/s390/s390.cc (s390_md_asm_adjust): Likewise. - * config/vax/vax.cc (vax_md_asm_adjust): Likewise. - * config/visium/visium.cc (visium_md_asm_adjust): Likewise. ---- - gcc/cfgexpand.cc | 37 +++++++++++++++++++--------- - gcc/config/arm/aarch-common-protos.h | 2 +- - gcc/config/arm/aarch-common.cc | 3 ++- - gcc/config/arm/arm.cc | 5 ++-- - gcc/config/avr/avr.cc | 1 + - gcc/config/cris/cris.cc | 6 +++-- - gcc/config/i386/i386.cc | 5 ++-- - gcc/config/mn10300/mn10300.cc | 3 ++- - gcc/config/nds32/nds32.cc | 4 +-- - gcc/config/pdp11/pdp11.cc | 6 +++-- - gcc/config/rs6000/rs6000.cc | 3 ++- - gcc/config/s390/s390.cc | 3 ++- - gcc/config/vax/vax.cc | 4 ++- - gcc/config/visium/visium.cc | 5 ++-- - gcc/doc/tm.texi | 5 ++-- - gcc/recog.cc | 20 ++++++++++----- - gcc/target.def | 5 ++-- - 17 files changed, 77 insertions(+), 40 deletions(-) - -diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc -index 4691355aa..5401a4ebd 100644 ---- a/gcc/cfgexpand.cc -+++ b/gcc/cfgexpand.cc -@@ -2873,6 +2873,7 @@ expand_asm_loc (tree string, int vol, location_t locus) - auto_vec input_rvec, output_rvec; - auto_vec input_mode; - auto_vec constraints; -+ auto_vec use_rvec; - auto_vec clobber_rvec; - HARD_REG_SET clobbered_regs; - CLEAR_HARD_REG_SET (clobbered_regs); -@@ -2882,16 +2883,20 @@ expand_asm_loc (tree string, int vol, location_t locus) - - if (targetm.md_asm_adjust) - targetm.md_asm_adjust (output_rvec, input_rvec, input_mode, -- constraints, clobber_rvec, clobbered_regs, -- locus); -+ constraints, use_rvec, clobber_rvec, -+ clobbered_regs, locus); - - asm_op = body; - nclobbers = clobber_rvec.length (); -- body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (1 + nclobbers)); -+ auto nuses = use_rvec.length (); -+ body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (1 + nuses + nclobbers)); - -- XVECEXP (body, 0, 0) = asm_op; -- for (i = 0; i < nclobbers; i++) -- XVECEXP (body, 0, i + 1) = gen_rtx_CLOBBER (VOIDmode, clobber_rvec[i]); -+ i = 0; -+ XVECEXP (body, 0, i++) = asm_op; -+ for (rtx use : use_rvec) -+ XVECEXP (body, 0, i++) = gen_rtx_USE (VOIDmode, use); -+ for (rtx clobber : clobber_rvec) -+ XVECEXP (body, 0, i++) = gen_rtx_CLOBBER (VOIDmode, clobber); - } - - emit_insn (body); -@@ -3443,11 +3448,12 @@ expand_asm_stmt (gasm *stmt) - maintaining source-level compatibility means automatically clobbering - the flags register. */ - rtx_insn *after_md_seq = NULL; -+ auto_vec use_rvec; - if (targetm.md_asm_adjust) - after_md_seq - = targetm.md_asm_adjust (output_rvec, input_rvec, input_mode, -- constraints, clobber_rvec, clobbered_regs, -- locus); -+ constraints, use_rvec, clobber_rvec, -+ clobbered_regs, locus); - - /* Do not allow the hook to change the output and input count, - lest it mess up the operand numbering. */ -@@ -3455,7 +3461,8 @@ expand_asm_stmt (gasm *stmt) - gcc_assert (input_rvec.length() == ninputs); - gcc_assert (constraints.length() == noutputs + ninputs); - -- /* But it certainly can adjust the clobbers. */ -+ /* But it certainly can adjust the uses and clobbers. */ -+ unsigned nuses = use_rvec.length (); - unsigned nclobbers = clobber_rvec.length (); - - /* Third pass checks for easy conflicts. */ -@@ -3527,7 +3534,7 @@ expand_asm_stmt (gasm *stmt) - ARGVEC CONSTRAINTS OPNAMES)) - If there is more than one, put them inside a PARALLEL. */ - -- if (noutputs == 0 && nclobbers == 0) -+ if (noutputs == 0 && nuses == 0 && nclobbers == 0) - { - /* No output operands: put in a raw ASM_OPERANDS rtx. */ - if (nlabels > 0) -@@ -3535,7 +3542,7 @@ expand_asm_stmt (gasm *stmt) - else - emit_insn (body); - } -- else if (noutputs == 1 && nclobbers == 0) -+ else if (noutputs == 1 && nuses == 0 && nclobbers == 0) - { - ASM_OPERANDS_OUTPUT_CONSTRAINT (body) = constraints[0]; - if (nlabels > 0) -@@ -3551,7 +3558,8 @@ expand_asm_stmt (gasm *stmt) - if (num == 0) - num = 1; - -- body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num + nclobbers)); -+ body = gen_rtx_PARALLEL (VOIDmode, -+ rtvec_alloc (num + nuses + nclobbers)); - - /* For each output operand, store a SET. */ - for (i = 0; i < noutputs; ++i) -@@ -3578,6 +3586,11 @@ expand_asm_stmt (gasm *stmt) - if (i == 0) - XVECEXP (body, 0, i++) = obody; - -+ /* Add the uses specified by the target hook. No checking should -+ be needed since this doesn't come directly from user code. */ -+ for (rtx use : use_rvec) -+ XVECEXP (body, 0, i++) = gen_rtx_USE (VOIDmode, use); -+ - /* Store (clobber REG) for each clobbered register specified. */ - for (unsigned j = 0; j < nclobbers; ++j) - { -diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h -index ae0465159..3b525c174 100644 ---- a/gcc/config/arm/aarch-common-protos.h -+++ b/gcc/config/arm/aarch-common-protos.h -@@ -149,7 +149,7 @@ struct cpu_cost_table - - rtx_insn *arm_md_asm_adjust (vec &outputs, vec & /*inputs*/, - vec & /*input_modes*/, -- vec &constraints, -+ vec &constraints, vec &, - vec &clobbers, HARD_REG_SET &clobbered_regs, - location_t loc); - -diff --git a/gcc/config/arm/aarch-common.cc b/gcc/config/arm/aarch-common.cc -index 04a53d750..365cfc140 100644 ---- a/gcc/config/arm/aarch-common.cc -+++ b/gcc/config/arm/aarch-common.cc -@@ -533,7 +533,8 @@ arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer) - rtx_insn * - arm_md_asm_adjust (vec &outputs, vec & /*inputs*/, - vec & /*input_modes*/, -- vec &constraints, vec & /*clobbers*/, -+ vec &constraints, -+ vec & /*uses*/, vec & /*clobbers*/, - HARD_REG_SET & /*clobbered_regs*/, location_t loc) - { - bool saw_asm_flag = false; -diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc -index b700c23b8..c72e9c0b0 100644 ---- a/gcc/config/arm/arm.cc -+++ b/gcc/config/arm/arm.cc -@@ -325,7 +325,7 @@ static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT); - static rtx_insn *thumb1_md_asm_adjust (vec &, vec &, - vec &, - vec &, vec &, -- HARD_REG_SET &, location_t); -+ vec &, HARD_REG_SET &, location_t); - static const char *arm_identify_fpu_from_isa (sbitmap); - - /* Table of machine attributes. */ -@@ -34209,7 +34209,8 @@ arm_stack_protect_guard (void) - rtx_insn * - thumb1_md_asm_adjust (vec &outputs, vec & /*inputs*/, - vec & /*input_modes*/, -- vec &constraints, vec & /*clobbers*/, -+ vec &constraints, -+ vec &, vec & /*clobbers*/, - HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/) - { - for (unsigned i = 0, n = outputs.length (); i < n; ++i) -diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc -index 4ed390e4c..1b5a95410 100644 ---- a/gcc/config/avr/avr.cc -+++ b/gcc/config/avr/avr.cc -@@ -14497,6 +14497,7 @@ static rtx_insn * - avr_md_asm_adjust (vec &/*outputs*/, vec &/*inputs*/, - vec & /*input_modes*/, - vec &/*constraints*/, -+ vec &/*uses*/, - vec &clobbers, HARD_REG_SET &clobbered_regs, - location_t /*loc*/) - { -diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc -index f0017d630..3a1c85481 100644 ---- a/gcc/config/cris/cris.cc -+++ b/gcc/config/cris/cris.cc -@@ -151,7 +151,8 @@ static void cris_function_arg_advance (cumulative_args_t, - const function_arg_info &); - static rtx_insn *cris_md_asm_adjust (vec &, vec &, - vec &, vec &, -- vec &, HARD_REG_SET &, location_t); -+ vec &, vec &, -+ HARD_REG_SET &, location_t); - - static void cris_option_override (void); - -@@ -3506,7 +3507,8 @@ cris_function_arg_advance (cumulative_args_t ca_v, - static rtx_insn * - cris_md_asm_adjust (vec &outputs, vec &inputs, - vec & /*input_modes*/, -- vec &constraints, vec &clobbers, -+ vec &constraints, -+ vec &/*uses*/, vec &clobbers, - HARD_REG_SET &clobbered_regs, location_t /*loc*/) - { - /* For the time being, all asms clobber condition codes. -diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc -index 593185fa6..83a0d8abb 100644 ---- a/gcc/config/i386/i386.cc -+++ b/gcc/config/i386/i386.cc -@@ -22252,8 +22252,9 @@ ix86_c_mode_for_suffix (char suffix) - static rtx_insn * - ix86_md_asm_adjust (vec &outputs, vec & /*inputs*/, - vec & /*input_modes*/, -- vec &constraints, vec &clobbers, -- HARD_REG_SET &clobbered_regs, location_t loc) -+ vec &constraints, vec &/*uses*/, -+ vec &clobbers, HARD_REG_SET &clobbered_regs, -+ location_t loc) - { - bool saw_asm_flag = false; - -diff --git a/gcc/config/mn10300/mn10300.cc b/gcc/config/mn10300/mn10300.cc -index 2a58dd925..2ca2c769c 100644 ---- a/gcc/config/mn10300/mn10300.cc -+++ b/gcc/config/mn10300/mn10300.cc -@@ -2849,7 +2849,8 @@ mn10300_conditional_register_usage (void) - static rtx_insn * - mn10300_md_asm_adjust (vec & /*outputs*/, vec & /*inputs*/, - vec & /*input_modes*/, -- vec & /*constraints*/, vec &clobbers, -+ vec & /*constraints*/, -+ vec &/*uses*/, vec &clobbers, - HARD_REG_SET &clobbered_regs, location_t /*loc*/) - { - clobbers.safe_push (gen_rtx_REG (CCmode, CC_REG)); -diff --git a/gcc/config/nds32/nds32.cc b/gcc/config/nds32/nds32.cc -index 71fe9e8bc..27530495f 100644 ---- a/gcc/config/nds32/nds32.cc -+++ b/gcc/config/nds32/nds32.cc -@@ -4199,8 +4199,8 @@ nds32_md_asm_adjust (vec &outputs ATTRIBUTE_UNUSED, - vec &inputs ATTRIBUTE_UNUSED, - vec &input_modes ATTRIBUTE_UNUSED, - vec &constraints ATTRIBUTE_UNUSED, -- vec &clobbers, HARD_REG_SET &clobbered_regs, -- location_t /*loc*/) -+ vec &/*uses*/, vec &clobbers, -+ HARD_REG_SET &clobbered_regs, location_t /*loc*/) - { - if (!flag_inline_asm_r15) - { -diff --git a/gcc/config/pdp11/pdp11.cc b/gcc/config/pdp11/pdp11.cc -index 380223439..25cf62cbc 100644 ---- a/gcc/config/pdp11/pdp11.cc -+++ b/gcc/config/pdp11/pdp11.cc -@@ -155,7 +155,8 @@ static int pdp11_addr_cost (rtx, machine_mode, addr_space_t, bool); - static int pdp11_insn_cost (rtx_insn *insn, bool speed); - static rtx_insn *pdp11_md_asm_adjust (vec &, vec &, - vec &, vec &, -- vec &, HARD_REG_SET &, location_t); -+ vec &, vec &, -+ HARD_REG_SET &, location_t); - static bool pdp11_return_in_memory (const_tree, const_tree); - static rtx pdp11_function_value (const_tree, const_tree, bool); - static rtx pdp11_libcall_value (machine_mode, const_rtx); -@@ -2137,7 +2138,8 @@ pdp11_cmp_length (rtx *operands, int words) - static rtx_insn * - pdp11_md_asm_adjust (vec & /*outputs*/, vec & /*inputs*/, - vec & /*input_modes*/, -- vec & /*constraints*/, vec &clobbers, -+ vec & /*constraints*/, -+ vec &/*uses*/, vec &clobbers, - HARD_REG_SET &clobbered_regs, location_t /*loc*/) - { - clobbers.safe_push (gen_rtx_REG (CCmode, CC_REGNUM)); -diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc -index 0b75861bb..55d4ce751 100644 ---- a/gcc/config/rs6000/rs6000.cc -+++ b/gcc/config/rs6000/rs6000.cc -@@ -3443,7 +3443,8 @@ rs6000_builtin_mask_calculate (void) - static rtx_insn * - rs6000_md_asm_adjust (vec & /*outputs*/, vec & /*inputs*/, - vec & /*input_modes*/, -- vec & /*constraints*/, vec &clobbers, -+ vec & /*constraints*/, -+ vec &/*uses*/, vec &clobbers, - HARD_REG_SET &clobbered_regs, location_t /*loc*/) - { - clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO)); -diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc -index ae0cf9ef5..f1599a5c5 100644 ---- a/gcc/config/s390/s390.cc -+++ b/gcc/config/s390/s390.cc -@@ -16994,7 +16994,8 @@ s390_hard_fp_reg_p (rtx x) - static rtx_insn * - s390_md_asm_adjust (vec &outputs, vec &inputs, - vec &input_modes, -- vec &constraints, vec & /*clobbers*/, -+ vec &constraints, -+ vec &/*uses*/, vec &/*clobbers*/, - HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/) - { - if (!TARGET_VXE) -diff --git a/gcc/config/vax/vax.cc b/gcc/config/vax/vax.cc -index 28c1af59a..7673a1428 100644 ---- a/gcc/config/vax/vax.cc -+++ b/gcc/config/vax/vax.cc -@@ -57,7 +57,8 @@ static bool vax_rtx_costs (rtx, machine_mode, int, int, int *, bool); - static machine_mode vax_cc_modes_compatible (machine_mode, machine_mode); - static rtx_insn *vax_md_asm_adjust (vec &, vec &, - vec &, vec &, -- vec &, HARD_REG_SET &, location_t); -+ vec &, vec &, HARD_REG_SET &, -+ location_t); - static rtx vax_function_arg (cumulative_args_t, const function_arg_info &); - static void vax_function_arg_advance (cumulative_args_t, - const function_arg_info &); -@@ -1179,6 +1180,7 @@ vax_md_asm_adjust (vec &outputs ATTRIBUTE_UNUSED, - vec &inputs ATTRIBUTE_UNUSED, - vec &input_modes ATTRIBUTE_UNUSED, - vec &constraints ATTRIBUTE_UNUSED, -+ vec &/*uses*/, - vec &clobbers, HARD_REG_SET &clobbered_regs, - location_t /*loc*/) - { -diff --git a/gcc/config/visium/visium.cc b/gcc/config/visium/visium.cc -index 03c1a33e1..35b46ced9 100644 ---- a/gcc/config/visium/visium.cc -+++ b/gcc/config/visium/visium.cc -@@ -190,7 +190,7 @@ static tree visium_build_builtin_va_list (void); - static rtx_insn *visium_md_asm_adjust (vec &, vec &, - vec &, - vec &, vec &, -- HARD_REG_SET &, location_t); -+ vec &, HARD_REG_SET &, location_t); - - static bool visium_legitimate_constant_p (machine_mode, rtx); - -@@ -794,7 +794,8 @@ visium_conditional_register_usage (void) - static rtx_insn * - visium_md_asm_adjust (vec & /*outputs*/, vec & /*inputs*/, - vec & /*input_modes*/, -- vec & /*constraints*/, vec &clobbers, -+ vec & /*constraints*/, -+ vec &/*uses*/, vec &clobbers, - HARD_REG_SET &clobbered_regs, location_t /*loc*/) - { - clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REGNUM)); -diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi -index 357c29a4d..4f93facf7 100644 ---- a/gcc/doc/tm.texi -+++ b/gcc/doc/tm.texi -@@ -11626,10 +11626,11 @@ from shared libraries (DLLs). - You need not define this macro if it would always evaluate to zero. - @end defmac - --@deftypefn {Target Hook} {rtx_insn *} TARGET_MD_ASM_ADJUST (vec& @var{outputs}, vec& @var{inputs}, vec& @var{input_modes}, vec& @var{constraints}, vec& @var{clobbers}, HARD_REG_SET& @var{clobbered_regs}, location_t @var{loc}) -+@deftypefn {Target Hook} {rtx_insn *} TARGET_MD_ASM_ADJUST (vec& @var{outputs}, vec& @var{inputs}, vec& @var{input_modes}, vec& @var{constraints}, vec& @var{usess}, vec& @var{clobbers}, HARD_REG_SET& @var{clobbered_regs}, location_t @var{loc}) - This target hook may add @dfn{clobbers} to @var{clobbers} and - @var{clobbered_regs} for any hard regs the port wishes to automatically --clobber for an asm. The @var{outputs} and @var{inputs} may be inspected -+clobber for an asm. It can also add hard registers that are used by the -+asm to @var{uses}. The @var{outputs} and @var{inputs} may be inspected - to avoid clobbering a register that is already used by the asm. @var{loc} - is the source location of the asm. - -diff --git a/gcc/recog.cc b/gcc/recog.cc -index cd2410ab2..5b81d5e21 100644 ---- a/gcc/recog.cc -+++ b/gcc/recog.cc -@@ -1977,13 +1977,17 @@ asm_noperands (const_rtx body) - { - /* Multiple output operands, or 1 output plus some clobbers: - body is -- [(set OUTPUT (asm_operands ...))... (clobber (reg ...))...]. */ -- /* Count backwards through CLOBBERs to determine number of SETs. */ -+ [(set OUTPUT (asm_operands ...))... -+ (use (reg ...))... -+ (clobber (reg ...))...]. */ -+ /* Count backwards through USEs and CLOBBERs to determine -+ number of SETs. */ - for (i = XVECLEN (body, 0); i > 0; i--) - { - if (GET_CODE (XVECEXP (body, 0, i - 1)) == SET) - break; -- if (GET_CODE (XVECEXP (body, 0, i - 1)) != CLOBBER) -+ if (GET_CODE (XVECEXP (body, 0, i - 1)) != USE -+ && GET_CODE (XVECEXP (body, 0, i - 1)) != CLOBBER) - return -1; - } - -@@ -2010,10 +2014,13 @@ asm_noperands (const_rtx body) - else - { - /* 0 outputs, but some clobbers: -- body is [(asm_operands ...) (clobber (reg ...))...]. */ -+ body is [(asm_operands ...) -+ (use (reg ...))... -+ (clobber (reg ...))...]. */ - /* Make sure all the other parallel things really are clobbers. */ - for (i = XVECLEN (body, 0) - 1; i > 0; i--) -- if (GET_CODE (XVECEXP (body, 0, i)) != CLOBBER) -+ if (GET_CODE (XVECEXP (body, 0, i)) != USE -+ && GET_CODE (XVECEXP (body, 0, i)) != CLOBBER) - return -1; - } - } -@@ -2080,7 +2087,8 @@ decode_asm_operands (rtx body, rtx *operands, rtx **operand_locs, - the SETs. Their constraints are in the ASM_OPERANDS itself. */ - for (i = 0; i < nparallel; i++) - { -- if (GET_CODE (XVECEXP (body, 0, i)) == CLOBBER) -+ if (GET_CODE (XVECEXP (body, 0, i)) == USE -+ || GET_CODE (XVECEXP (body, 0, i)) == CLOBBER) - break; /* Past last SET */ - gcc_assert (GET_CODE (XVECEXP (body, 0, i)) == SET); - if (operands) -diff --git a/gcc/target.def b/gcc/target.def -index a57e51b0d..60096c60c 100644 ---- a/gcc/target.def -+++ b/gcc/target.def -@@ -4309,7 +4309,8 @@ DEFHOOK - (md_asm_adjust, - "This target hook may add @dfn{clobbers} to @var{clobbers} and\n\ - @var{clobbered_regs} for any hard regs the port wishes to automatically\n\ --clobber for an asm. The @var{outputs} and @var{inputs} may be inspected\n\ -+clobber for an asm. It can also add hard registers that are used by the\n\ -+asm to @var{uses}. The @var{outputs} and @var{inputs} may be inspected\n\ - to avoid clobbering a register that is already used by the asm. @var{loc}\n\ - is the source location of the asm.\n\ - \n\ -@@ -4320,7 +4321,7 @@ changes to @var{inputs} must be accompanied by the corresponding changes\n\ - to @var{input_modes}.", - rtx_insn *, - (vec& outputs, vec& inputs, vec& input_modes, -- vec& constraints, vec& clobbers, -+ vec& constraints, vec& usess, vec& clobbers, - HARD_REG_SET& clobbered_regs, location_t loc), - NULL) - --- -2.33.0 - diff --git a/0152-Backport-SME-New-compact-syntax-for-insn-and-insn_sp.patch b/0152-Backport-SME-New-compact-syntax-for-insn-and-insn_sp.patch deleted file mode 100644 index edf0b1e..0000000 --- a/0152-Backport-SME-New-compact-syntax-for-insn-and-insn_sp.patch +++ /dev/null @@ -1,998 +0,0 @@ -From 763db5ed42e18cdddf979dda82056345e3af15ed Mon Sep 17 00:00:00 2001 -From: Tamar Christina -Date: Mon, 19 Jun 2023 15:47:46 +0100 -Subject: [PATCH 053/157] [Backport][SME] New compact syntax for insn and - insn_split in Machine Descriptions. - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=957ae90406591739b68e95ad49a0232faeb74217 - -This patch adds support for a compact syntax for specifying constraints in -instruction patterns. Credit for the idea goes to Richard Earnshaw. - -With this new syntax we want a clean break from the current limitations to make -something that is hopefully easier to use and maintain. - -The idea behind this compact syntax is that often times it's quite hard to -correlate the entries in the constrains list, attributes and instruction lists. - -One has to count and this often is tedious. Additionally when changing a single -line in the insn multiple lines in a diff change, making it harder to see what's -going on. - -This new syntax takes into account many of the common things that are done in MD -files. It's also worth saying that this version is intended to deal with the -common case of a string based alternatives. For C chunks we have some ideas -but those are not intended to be addressed here. - -It's easiest to explain with an example: - -normal syntax: - -(define_insn_and_split "*movsi_aarch64" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m, r, r, r, w,r,w, w") - (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))] - "(register_operand (operands[0], SImode) - || aarch64_reg_or_zero (operands[1], SImode))" - "@ - mov\\t%w0, %w1 - mov\\t%w0, %w1 - mov\\t%w0, %w1 - mov\\t%w0, %1 - # - * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]); - ldr\\t%w0, %1 - ldr\\t%s0, %1 - str\\t%w1, %0 - str\\t%s1, %0 - adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1] - adr\\t%x0, %c1 - adrp\\t%x0, %A1 - fmov\\t%s0, %w1 - fmov\\t%w0, %s1 - fmov\\t%s0, %s1 - * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);" - "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode) - && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" - [(const_int 0)] - "{ - aarch64_expand_mov_immediate (operands[0], operands[1]); - DONE; - }" - ;; The "mov_imm" type for CNT is just a placeholder. - [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4, - load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move") - (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd") - (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4") -] -) - -New syntax: - -(define_insn_and_split "*movsi_aarch64" - [(set (match_operand:SI 0 "nonimmediate_operand") - (match_operand:SI 1 "aarch64_mov_operand"))] - "(register_operand (operands[0], SImode) - || aarch64_reg_or_zero (operands[1], SImode))" - {@ [cons: =0, 1; attrs: type, arch, length] - [r , r ; mov_reg , * , 4] mov\t%w0, %w1 - [k , r ; mov_reg , * , 4] ^ - [r , k ; mov_reg , * , 4] ^ - [r , M ; mov_imm , * , 4] mov\t%w0, %1 - [r , n ; mov_imm , * ,16] # - /* The "mov_imm" type for CNT is just a placeholder. */ - [r , Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]); - [r , m ; load_4 , * , 4] ldr\t%w0, %1 - [w , m ; load_4 , fp , 4] ldr\t%s0, %1 - [m , rZ ; store_4 , * , 4] str\t%w1, %0 - [m , w ; store_4 , fp , 4] str\t%s1, %0 - [r , Usw; load_4 , * , 8] adrp\t%x0, %A1;ldr\t%w0, [%x0, %L1] - [r , Usa; adr , * , 4] adr\t%x0, %c1 - [r , Ush; adr , * , 4] adrp\t%x0, %A1 - [w , rZ ; f_mcr , fp , 4] fmov\t%s0, %w1 - [r , w ; f_mrc , fp , 4] fmov\t%w0, %s1 - [w , w ; fmov , fp , 4] fmov\t%s0, %s1 - [w , Ds ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); - } - "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode) - && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" - [(const_int 0)] - { - aarch64_expand_mov_immediate (operands[0], operands[1]); - DONE; - } -) - -The main syntax rules are as follows (See docs for full rules): - - Template must start with "{@" and end with "}" to use the new syntax. - - "{@" is followed by a layout in parentheses which is "cons:" followed by - a list of match_operand/match_scratch IDs, then a semicolon, then the - same for attributes ("attrs:"). Both sections are optional (so you can - use only cons, or only attrs, or both), and cons must come before attrs - if present. - - Each alternative begins with any amount of whitespace. - - Following the whitespace is a comma-separated list of constraints and/or - attributes within brackets [], with sections separated by a semicolon. - - Following the closing ']' is any amount of whitespace, and then the actual - asm output. - - Spaces are allowed in the list (they will simply be removed). - - All alternatives should be specified: a blank list should be - "[,,]", "[,,;,]" etc., not "[]" or "" (however genattr may segfault if - you leave certain attributes empty, I have found). - - The actual constraint string in the match_operand or match_scratch, and - the attribute string in the set_attr, must be blank or an empty string - (you can't combine the old and new syntaxes). - - The common idion * return can be shortened by using <<. - - Any unexpanded iterators left during processing will result in an error at - compile time. If for some reason <> is needed in the output then these - must be escaped using \. - - Within an {@ block both multiline and singleline C comments are allowed, but - when used outside of a C block they must be the only non-whitespace blocks on - the line - - Inside an {@ block any unexpanded iterators will result in a compile time - fault instead of incorrect assembly being generated at runtime. If the - literal <> is needed in the output this needs to be escaped with \<\>. - - This check is not performed inside C blocks (lines starting with *). - - Instead of copying the previous instruction again in the next pattern, one - can use ^ to refer to the previous asm string. - -This patch works by blindly transforming the new syntax into the old syntax, -so it doesn't do extensive checking. However, it does verify that: - - The correct number of constraints/attributes are specified. - - You haven't mixed old and new syntax. - - The specified operand IDs/attribute names actually exist. - - You don't have duplicate cons - -If something goes wrong, it may write invalid constraints/attributes/template -back into the rtx. But this shouldn't matter because error_at will cause the -program to fail on exit anyway. - -Because this transformation occurs as early as possible (before patterns are -queued), the rest of the compiler can completely ignore the new syntax and -assume that the old syntax will always be used. - -This doesn't seem to have any measurable effect on the runtime of gen* -programs. - -gcc/ChangeLog: - - * gensupport.cc (class conlist, add_constraints, add_attributes, - skip_spaces, expect_char, preprocess_compact_syntax, - parse_section_layout, parse_section, convert_syntax): New. - (process_rtx): Check for conversion. - * genoutput.cc (process_template): Check for unresolved iterators. - (class data): Add compact_syntax_p. - (gen_insn): Use it. - * gensupport.h (compact_syntax): New. - (hash-set.h): Include. - * doc/md.texi: Document it. - -Co-Authored-By: Omar Tahir ---- - gcc/doc/md.texi | 163 +++++++++++++++ - gcc/genoutput.cc | 48 ++++- - gcc/gensupport.cc | 498 ++++++++++++++++++++++++++++++++++++++++++++++ - gcc/gensupport.h | 3 + - 4 files changed, 709 insertions(+), 3 deletions(-) - -diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi -index 3b544358b..04ace8f7f 100644 ---- a/gcc/doc/md.texi -+++ b/gcc/doc/md.texi -@@ -27,6 +27,7 @@ See the next chapter for information on the C header file. - from such an insn. - * Output Statement:: For more generality, write C code to output - the assembler code. -+* Compact Syntax:: Compact syntax for writing machine descriptors. - * Predicates:: Controlling what kinds of operands can be used - for an insn. - * Constraints:: Fine-tuning operand selection. -@@ -713,6 +714,168 @@ you can use @samp{*} inside of a @samp{@@} multi-alternative template: - @end group - @end smallexample - -+@node Compact Syntax -+@section Compact Syntax -+@cindex compact syntax -+ -+When a @code{define_insn} or @code{define_insn_and_split} has multiple -+alternatives it may be beneficial to use the compact syntax when specifying -+alternatives. -+ -+This syntax puts the constraints and attributes on the same horizontal line as -+the instruction assembly template. -+ -+As an example -+ -+@smallexample -+@group -+(define_insn_and_split "" -+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r") -+ (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,Usv"))] -+ "" -+ "@@ -+ mov\\t%w0, %w1 -+ mov\\t%w0, %w1 -+ mov\\t%w0, %w1 -+ mov\\t%w0, %1 -+ # -+ * return aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);" -+ "&& true" -+ [(const_int 0)] -+ @{ -+ aarch64_expand_mov_immediate (operands[0], operands[1]); -+ DONE; -+ @} -+ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm") -+ (set_attr "arch" "*,*,*,*,*,sve") -+ (set_attr "length" "4,4,4,4,*, 4") -+] -+) -+@end group -+@end smallexample -+ -+can be better expressed as: -+ -+@smallexample -+@group -+(define_insn_and_split "" -+ [(set (match_operand:SI 0 "nonimmediate_operand") -+ (match_operand:SI 1 "aarch64_mov_operand"))] -+ "" -+ @{@@ [cons: =0, 1; attrs: type, arch, length] -+ [r , r ; mov_reg , * , 4] mov\t%w0, %w1 -+ [k , r ; mov_reg , * , 4] ^ -+ [r , k ; mov_reg , * , 4] ^ -+ [r , M ; mov_imm , * , 4] mov\t%w0, %1 -+ [r , n ; mov_imm , * , *] # -+ [r , Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]); -+ @} -+ "&& true" -+ [(const_int 0)] -+ @{ -+ aarch64_expand_mov_immediate (operands[0], operands[1]); -+ DONE; -+ @} -+) -+@end group -+@end smallexample -+ -+The syntax rules are as follows: -+@itemize @bullet -+@item -+Templates must start with @samp{@{@@} to use the new syntax. -+ -+@item -+@samp{@{@@} is followed by a layout in square brackets which is @samp{cons:} -+followed by a comma-separated list of @code{match_operand}/@code{match_scratch} -+operand numbers, then a semicolon, followed by the same for attributes -+(@samp{attrs:}). Operand modifiers like @code{=} and @code{+} can be placed -+before an operand number. -+Both sections are optional (so you can use only @samp{cons}, or only -+@samp{attrs}, or both), and @samp{cons} must come before @samp{attrs} if -+present. -+ -+@item -+Each alternative begins with any amount of whitespace. -+ -+@item -+Following the whitespace is a comma-separated list of "constraints" and/or -+"attributes" within brackets @code{[]}, with sections separated by a semicolon. -+ -+@item -+Should you want to copy the previous asm line, the symbol @code{^} can be used. -+This allows less copy pasting between alternative and reduces the number of -+lines to update on changes. -+ -+@item -+When using C functions for output, the idiom @samp{* return @var{function};} -+can be replaced with the shorthand @samp{<< @var{function};}. -+ -+@item -+Following the closing @samp{]} is any amount of whitespace, and then the actual -+asm output. -+ -+@item -+Spaces are allowed in the list (they will simply be removed). -+ -+@item -+All constraint alternatives should be specified. For example, a list of -+of three blank alternatives should be written @samp{[,,]} rather than -+@samp{[]}. -+ -+@item -+All attribute alternatives should be non-empty, with @samp{*} -+representing the default attribute value. For example, a list of three -+default attribute values should be written @samp{[*,*,*]} rather than -+@samp{[]}. -+ -+@item -+Within an @samp{@{@@} block both multiline and singleline C comments are -+allowed, but when used outside of a C block they must be the only non-whitespace -+blocks on the line. -+ -+@item -+Within an @samp{@{@@} block, any iterators that do not get expanded will result -+in an error. If for some reason it is required to have @code{<} or @code{>} in -+the output then these must be escaped using @backslashchar{}. -+ -+@item -+It is possible to use the @samp{attrs} list to specify some attributes and to -+use the normal @code{set_attr} syntax to specify other attributes. There must -+not be any overlap between the two lists. -+ -+In other words, the following is valid: -+@smallexample -+@group -+(define_insn_and_split "" -+ [(set (match_operand:SI 0 "nonimmediate_operand") -+ (match_operand:SI 1 "aarch64_mov_operand"))] -+ "" -+ @{@@ [cons: 0, 1; attrs: type, arch, length]@} -+ @dots{} -+ [(set_attr "foo" "mov_imm")] -+) -+@end group -+@end smallexample -+ -+but this is not valid: -+@smallexample -+@group -+(define_insn_and_split "" -+ [(set (match_operand:SI 0 "nonimmediate_operand") -+ (match_operand:SI 1 "aarch64_mov_operand"))] -+ "" -+ @{@@ [cons: 0, 1; attrs: type, arch, length]@} -+ @dots{} -+ [(set_attr "arch" "bar") -+ (set_attr "foo" "mov_imm")] -+) -+@end group -+@end smallexample -+ -+because it specifies @code{arch} twice. -+@end itemize -+ - @node Predicates - @section Predicates - @cindex predicates -diff --git a/gcc/genoutput.cc b/gcc/genoutput.cc -index 6bb03e286..de5dafdbf 100644 ---- a/gcc/genoutput.cc -+++ b/gcc/genoutput.cc -@@ -157,6 +157,7 @@ public: - int n_alternatives; /* Number of alternatives in each constraint */ - int operand_number; /* Operand index in the big array. */ - int output_format; /* INSN_OUTPUT_FORMAT_*. */ -+ bool compact_syntax_p; - struct operand_data operand[MAX_MAX_OPERANDS]; - }; - -@@ -700,12 +701,51 @@ process_template (class data *d, const char *template_code) - if (sp != ep) - message_at (d->loc, "trailing whitespace in output template"); - -- while (cp < sp) -+ /* Check for any unexpanded iterators. */ -+ if (bp[0] != '*' && d->compact_syntax_p) - { -- putchar (*cp); -- cp++; -+ const char *p = cp; -+ const char *last_bracket = nullptr; -+ while (p < sp) -+ { -+ if (*p == '\\' && p + 1 < sp) -+ { -+ putchar (*p); -+ putchar (*(p+1)); -+ p += 2; -+ continue; -+ } -+ -+ if (*p == '>' && last_bracket && *last_bracket == '<') -+ { -+ int len = p - last_bracket; -+ fatal_at (d->loc, "unresolved iterator '%.*s' in '%s'", -+ len - 1, last_bracket + 1, cp); -+ } -+ else if (*p == '<' || *p == '>') -+ last_bracket = p; -+ -+ putchar (*p); -+ p += 1; -+ } -+ -+ if (last_bracket) -+ { -+ char *nl = strchr (const_cast (cp), '\n'); -+ if (nl) -+ *nl = '\0'; -+ fatal_at (d->loc, "unmatched angle brackets, likely an " -+ "error in iterator syntax in %s", cp); -+ } -+ } -+ else -+ { -+ while (cp < sp) -+ putchar (*(cp++)); - } - -+ cp = sp; -+ - if (!found_star) - puts ("\","); - else if (*bp != '*') -@@ -881,6 +921,8 @@ gen_insn (md_rtx_info *info) - else - d->name = 0; - -+ d->compact_syntax_p = compact_syntax.contains (insn); -+ - /* Build up the list in the same order as the insns are seen - in the machine description. */ - d->next = 0; -diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc -index 42680499d..23c61dcdd 100644 ---- a/gcc/gensupport.cc -+++ b/gcc/gensupport.cc -@@ -18,6 +18,8 @@ - . */ - - #include "bconfig.h" -+#define INCLUDE_STRING -+#define INCLUDE_VECTOR - #include "system.h" - #include "coretypes.h" - #include "tm.h" -@@ -33,6 +35,8 @@ - static rtx operand_data[MAX_OPERANDS]; - static rtx match_operand_entries_in_pattern[MAX_OPERANDS]; - static char used_operands_numbers[MAX_OPERANDS]; -+/* List of entries which are part of the new syntax. */ -+hash_set compact_syntax; - - - /* In case some macros used by files we include need it, define this here. */ -@@ -545,6 +549,497 @@ gen_rewrite_sequence (rtvec vec) - return new_vec; - } - -+/* The following is for handling the compact syntax for constraints and -+ attributes. -+ -+ The normal syntax looks like this: -+ -+ ... -+ (match_operand: 0 "s_register_operand" "r,I,k") -+ (match_operand: 2 "s_register_operand" "r,k,I") -+ ... -+ "@ -+ -+ -+ " -+ ... -+ (set_attr "length" "4,8,8") -+ -+ The compact syntax looks like this: -+ -+ ... -+ (match_operand: 0 "s_register_operand") -+ (match_operand: 2 "s_register_operand") -+ ... -+ {@ [cons: 0, 2; attrs: length] -+ [r,r; 4] -+ [I,k; 8] -+ [k,I; 8] -+ } -+ ... -+ [] -+ -+ This is the only place where this syntax needs to be handled. Relevant -+ patterns are transformed from compact to the normal syntax before they are -+ queued, so none of the gen* programs need to know about this syntax at all. -+ -+ Conversion process (convert_syntax): -+ -+ 0) Check that pattern actually uses new syntax (check for {@ ... }). -+ -+ 1) Get the "layout", i.e. the "[cons: 0 2; attrs: length]" from the above -+ example. cons must come first; both are optional. Set up two vecs, -+ convec and attrvec, for holding the results of the transformation. -+ -+ 2) For each alternative: parse the list of constraints and/or attributes, -+ and enqueue them in the relevant lists in convec and attrvec. By the end -+ of this process, convec[N].con and attrvec[N].con should contain regular -+ syntax constraint/attribute lists like "r,I,k". Copy the asm to a string -+ as we go. -+ -+ 3) Search the rtx and write the constraint and attribute lists into the -+ correct places. Write the asm back into the template. */ -+ -+/* Helper class for shuffling constraints/attributes in convert_syntax and -+ add_constraints/add_attributes. This includes commas but not whitespace. */ -+ -+class conlist { -+private: -+ std::string con; -+ -+public: -+ std::string name; -+ int idx = -1; -+ -+ conlist () = default; -+ -+ /* [ns..ns + len) should be a string with the id of the rtx to match -+ i.e. if rtx is the relevant match_operand or match_scratch then -+ [ns..ns + len) should equal itoa (XINT (rtx, 0)), and if set_attr then -+ [ns..ns + len) should equal XSTR (rtx, 0). */ -+ conlist (const char *ns, unsigned int len, bool numeric) -+ { -+ /* Trim leading whitespaces. */ -+ while (ISBLANK (*ns)) -+ { -+ ns++; -+ len--; -+ } -+ -+ /* Trim trailing whitespace. */ -+ for (int i = len - 1; i >= 0; i--, len--) -+ if (!ISBLANK (ns[i])) -+ break; -+ -+ /* Parse off any modifiers. */ -+ while (!ISALNUM (*ns)) -+ { -+ con += *(ns++); -+ len--; -+ } -+ -+ name.assign (ns, len); -+ if (numeric) -+ idx = std::stoi (name); -+ } -+ -+ /* Adds a character to the end of the string. */ -+ void add (char c) -+ { -+ con += c; -+ } -+ -+ /* Output the string in the form of a brand-new char *, then effectively -+ clear the internal string by resetting len to 0. */ -+ char *out () -+ { -+ /* Final character is always a trailing comma, so strip it out. */ -+ char *q = xstrndup (con.c_str (), con.size () - 1); -+ con.clear (); -+ return q; -+ } -+}; -+ -+typedef std::vector vec_conlist; -+ -+/* Add constraints to an rtx. This function is similar to remove_constraints. -+ Errors if adding the constraints would overwrite existing constraints. */ -+ -+static void -+add_constraints (rtx part, file_location loc, vec_conlist &cons) -+{ -+ const char *format_ptr; -+ -+ if (part == NULL_RTX) -+ return; -+ -+ /* If match_op or match_scr, check if we have the right one, and if so, copy -+ over the constraint list. */ -+ if (GET_CODE (part) == MATCH_OPERAND || GET_CODE (part) == MATCH_SCRATCH) -+ { -+ int field = GET_CODE (part) == MATCH_OPERAND ? 2 : 1; -+ unsigned id = XINT (part, 0); -+ -+ if (id >= cons.size () || cons[id].idx == -1) -+ return; -+ -+ if (XSTR (part, field)[0] != '\0') -+ { -+ error_at (loc, "can't mix normal and compact constraint syntax"); -+ return; -+ } -+ XSTR (part, field) = cons[id].out (); -+ cons[id].idx = -1; -+ } -+ -+ format_ptr = GET_RTX_FORMAT (GET_CODE (part)); -+ -+ /* Recursively search the rtx. */ -+ for (int i = 0; i < GET_RTX_LENGTH (GET_CODE (part)); i++) -+ switch (*format_ptr++) -+ { -+ case 'e': -+ case 'u': -+ add_constraints (XEXP (part, i), loc, cons); -+ break; -+ case 'E': -+ if (XVEC (part, i) != NULL) -+ for (int j = 0; j < XVECLEN (part, i); j++) -+ add_constraints (XVECEXP (part, i, j), loc, cons); -+ break; -+ default: -+ continue; -+ } -+} -+ -+/* Add ATTRS to definition X's attribute list. */ -+ -+static void -+add_attributes (rtx x, vec_conlist &attrs) -+{ -+ unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3; -+ rtvec orig = XVEC (x, attr_index); -+ if (orig) -+ { -+ size_t n_curr = XVECLEN (x, attr_index); -+ rtvec copy = rtvec_alloc (n_curr + attrs.size ()); -+ -+ /* Create a shallow copy of existing entries. */ -+ memcpy (©->elem[attrs.size ()], &orig->elem[0], -+ sizeof (rtx) * n_curr); -+ XVEC (x, attr_index) = copy; -+ } -+ else -+ XVEC (x, attr_index) = rtvec_alloc (attrs.size ()); -+ -+ /* Create the new elements. */ -+ for (unsigned i = 0; i < attrs.size (); i++) -+ { -+ rtx attr = rtx_alloc (SET_ATTR); -+ XSTR (attr, 0) = xstrdup (attrs[i].name.c_str ()); -+ XSTR (attr, 1) = attrs[i].out (); -+ XVECEXP (x, attr_index, i) = attr; -+ } -+} -+ -+/* Consumes spaces and tabs. */ -+ -+static inline void -+skip_spaces (const char **str) -+{ -+ while (ISBLANK (**str)) -+ (*str)++; -+} -+ -+/* Consumes the given character, if it's there. */ -+ -+static inline bool -+expect_char (const char **str, char c) -+{ -+ if (**str != c) -+ return false; -+ (*str)++; -+ return true; -+} -+ -+/* Parses the section layout that follows a "{@" if using new syntax. Builds -+ a vector for a single section. E.g. if we have "attrs: length, arch]..." -+ then list will have two elements, the first for "length" and the second -+ for "arch". */ -+ -+static void -+parse_section_layout (file_location loc, const char **templ, const char *label, -+ vec_conlist &list, bool numeric) -+{ -+ const char *name_start; -+ size_t label_len = strlen (label); -+ if (strncmp (label, *templ, label_len) == 0) -+ { -+ *templ += label_len; -+ -+ /* Gather the names. */ -+ while (**templ != ';' && **templ != ']') -+ { -+ skip_spaces (templ); -+ name_start = *templ; -+ int len = 0; -+ char val = (*templ)[len]; -+ while (val != ',' && val != ';' && val != ']') -+ { -+ if (val == 0 || val == '\n') -+ fatal_at (loc, "missing ']'"); -+ val = (*templ)[++len]; -+ } -+ *templ += len; -+ if (val == ',') -+ (*templ)++; -+ list.push_back (conlist (name_start, len, numeric)); -+ } -+ } -+} -+ -+/* Parse a section, a section is defined as a named space separated list, e.g. -+ -+ foo: a, b, c -+ -+ is a section named "foo" with entries a, b and c. */ -+ -+static void -+parse_section (const char **templ, unsigned int n_elems, unsigned int alt_no, -+ vec_conlist &list, file_location loc, const char *name) -+{ -+ unsigned int i; -+ -+ /* Go through the list, one character at a time, adding said character -+ to the correct string. */ -+ for (i = 0; **templ != ']' && **templ != ';'; (*templ)++) -+ if (!ISBLANK (**templ)) -+ { -+ if (**templ == 0 || **templ == '\n') -+ fatal_at (loc, "missing ']'"); -+ list[i].add (**templ); -+ if (**templ == ',') -+ { -+ ++i; -+ if (i == n_elems) -+ fatal_at (loc, "too many %ss in alternative %d: expected %d", -+ name, alt_no, n_elems); -+ } -+ } -+ -+ if (i + 1 < n_elems) -+ fatal_at (loc, "too few %ss in alternative %d: expected %d, got %d", -+ name, alt_no, n_elems, i); -+ -+ list[i].add (','); -+} -+ -+/* The compact syntax has more convience syntaxes. As such we post process -+ the lines to get them back to something the normal syntax understands. */ -+ -+static void -+preprocess_compact_syntax (file_location loc, int alt_no, std::string &line, -+ std::string &last_line) -+{ -+ /* Check if we're copying the last statement. */ -+ if (line.find ("^") == 0 && line.size () == 1) -+ { -+ if (last_line.empty ()) -+ fatal_at (loc, "found instruction to copy previous line (^) in" -+ "alternative %d but no previous line to copy", alt_no); -+ line = last_line; -+ return; -+ } -+ -+ std::string result; -+ std::string buffer; -+ /* Check if we have << which means return c statement. */ -+ if (line.find ("<<") == 0) -+ { -+ result.append ("* return "); -+ const char *chunk = line.c_str () + 2; -+ skip_spaces (&chunk); -+ result.append (chunk); -+ } -+ else -+ result.append (line); -+ -+ line = result; -+ return; -+} -+ -+/* Converts an rtx from compact syntax to normal syntax if possible. */ -+ -+static void -+convert_syntax (rtx x, file_location loc) -+{ -+ int alt_no; -+ unsigned int templ_index; -+ const char *templ; -+ vec_conlist tconvec, convec, attrvec; -+ -+ templ_index = GET_CODE (x) == DEFINE_INSN ? 3 : 2; -+ -+ templ = XTMPL (x, templ_index); -+ -+ /* Templates with constraints start with "{@". */ -+ if (strncmp ("*{@", templ, 3)) -+ return; -+ -+ /* Get the layout for the template. */ -+ templ += 3; -+ skip_spaces (&templ); -+ -+ if (!expect_char (&templ, '[')) -+ fatal_at (loc, "expecing `[' to begin section list"); -+ -+ parse_section_layout (loc, &templ, "cons:", tconvec, true); -+ -+ /* Check for any duplicate cons entries and sort based on i. */ -+ for (auto e : tconvec) -+ { -+ unsigned idx = e.idx; -+ if (idx >= convec.size ()) -+ convec.resize (idx + 1); -+ -+ if (convec[idx].idx >= 0) -+ fatal_at (loc, "duplicate cons number found: %d", idx); -+ convec[idx] = e; -+ } -+ tconvec.clear (); -+ -+ if (*templ != ']') -+ { -+ if (*templ == ';') -+ skip_spaces (&(++templ)); -+ parse_section_layout (loc, &templ, "attrs:", attrvec, false); -+ } -+ -+ if (!expect_char (&templ, ']')) -+ fatal_at (loc, "expecting `]` to end section list - section list must have " -+ "cons first, attrs second"); -+ -+ /* We will write the un-constrainified template into new_templ. */ -+ std::string new_templ; -+ new_templ.append ("@"); -+ -+ /* Skip to the first proper line. */ -+ skip_spaces (&templ); -+ if (*templ == 0) -+ fatal_at (loc, "'{@...}' blocks must have at least one alternative"); -+ if (*templ != '\n') -+ fatal_at (loc, "unexpected character '%c' after ']'", *templ); -+ templ++; -+ -+ alt_no = 0; -+ std::string last_line; -+ -+ /* Process the alternatives. */ -+ while (*(templ - 1) != '\0') -+ { -+ /* Skip leading whitespace. */ -+ std::string buffer; -+ skip_spaces (&templ); -+ -+ /* Check if we're at the end. */ -+ if (templ[0] == '}' && templ[1] == '\0') -+ break; -+ -+ if (expect_char (&templ, '[')) -+ { -+ new_templ += '\n'; -+ new_templ.append (buffer); -+ /* Parse the constraint list, then the attribute list. */ -+ if (convec.size () > 0) -+ parse_section (&templ, convec.size (), alt_no, convec, loc, -+ "constraint"); -+ -+ if (attrvec.size () > 0) -+ { -+ if (convec.size () > 0 && !expect_char (&templ, ';')) -+ fatal_at (loc, "expected `;' to separate constraints " -+ "and attributes in alternative %d", alt_no); -+ -+ parse_section (&templ, attrvec.size (), alt_no, -+ attrvec, loc, "attribute"); -+ } -+ -+ if (!expect_char (&templ, ']')) -+ fatal_at (loc, "expected end of constraint/attribute list but " -+ "missing an ending `]' in alternative %d", alt_no); -+ } -+ else if (templ[0] == '/' && templ[1] == '/') -+ { -+ templ += 2; -+ /* Glob till newline or end of string. */ -+ while (*templ != '\n' || *templ != '\0') -+ templ++; -+ -+ /* Skip any newlines or whitespaces needed. */ -+ while (ISSPACE(*templ)) -+ templ++; -+ continue; -+ } -+ else if (templ[0] == '/' && templ[1] == '*') -+ { -+ templ += 2; -+ /* Glob till newline or end of multiline comment. */ -+ while (templ[0] != 0 && templ[0] != '*' && templ[1] != '/') -+ templ++; -+ -+ while (templ[0] != '*' || templ[1] != '/') -+ { -+ if (templ[0] == 0) -+ fatal_at (loc, "unterminated '/*'"); -+ templ++; -+ } -+ templ += 2; -+ -+ /* Skip any newlines or whitespaces needed. */ -+ while (ISSPACE(*templ)) -+ templ++; -+ continue; -+ } -+ else -+ fatal_at (loc, "expected constraint/attribute list at beginning of " -+ "alternative %d but missing a starting `['", alt_no); -+ -+ /* Skip whitespace between list and asm. */ -+ skip_spaces (&templ); -+ -+ /* Copy asm to new template. */ -+ std::string line; -+ while (*templ != '\n' && *templ != '\0') -+ line += *templ++; -+ -+ /* Apply any pre-processing needed to the line. */ -+ preprocess_compact_syntax (loc, alt_no, line, last_line); -+ new_templ.append (line); -+ last_line = line; -+ -+ /* Normal "*..." syntax expects the closing quote to be on the final -+ line of asm, whereas we allow the closing "}" to be on its own line. -+ Postpone copying the '\n' until we know that there is another -+ alternative in the list. */ -+ while (ISSPACE (*templ)) -+ templ++; -+ ++alt_no; -+ } -+ -+ /* Write the constraints and attributes into their proper places. */ -+ if (convec.size () > 0) -+ add_constraints (x, loc, convec); -+ -+ if (attrvec.size () > 0) -+ add_attributes (x, attrvec); -+ -+ /* Copy over the new un-constrainified template. */ -+ XTMPL (x, templ_index) = xstrdup (new_templ.c_str ()); -+ -+ /* Register for later checks during iterator expansions. */ -+ compact_syntax.add (x); -+} -+ - /* Process a top level rtx in some way, queuing as appropriate. */ - - static void -@@ -553,10 +1048,12 @@ process_rtx (rtx desc, file_location loc) - switch (GET_CODE (desc)) - { - case DEFINE_INSN: -+ convert_syntax (desc, loc); - queue_pattern (desc, &define_insn_tail, loc); - break; - - case DEFINE_COND_EXEC: -+ convert_syntax (desc, loc); - queue_pattern (desc, &define_cond_exec_tail, loc); - break; - -@@ -631,6 +1128,7 @@ process_rtx (rtx desc, file_location loc) - attr = XVEC (desc, split_code + 1); - PUT_CODE (desc, DEFINE_INSN); - XVEC (desc, 4) = attr; -+ convert_syntax (desc, loc); - - /* Queue them. */ - insn_elem = queue_pattern (desc, &define_insn_tail, loc); -diff --git a/gcc/gensupport.h b/gcc/gensupport.h -index 9a0fd7393..a19fc1319 100644 ---- a/gcc/gensupport.h -+++ b/gcc/gensupport.h -@@ -20,6 +20,7 @@ along with GCC; see the file COPYING3. If not see - #ifndef GCC_GENSUPPORT_H - #define GCC_GENSUPPORT_H - -+#include "hash-set.h" - #include "read-md.h" - - struct obstack; -@@ -218,6 +219,8 @@ struct pattern_stats - int num_operand_vars; - }; - -+extern hash_set compact_syntax; -+ - extern void get_pattern_stats (struct pattern_stats *ranges, rtvec vec); - extern void compute_test_codes (rtx, file_location, char *); - extern file_location get_file_location (rtx); --- -2.33.0 - diff --git a/0153-Backport-SME-recog-Improve-parser-for-pattern-new-co.patch b/0153-Backport-SME-recog-Improve-parser-for-pattern-new-co.patch deleted file mode 100644 index 1302ea1..0000000 --- a/0153-Backport-SME-recog-Improve-parser-for-pattern-new-co.patch +++ /dev/null @@ -1,104 +0,0 @@ -From 35b64175c6fd622212d0bf936e7e98c635e1c618 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Wed, 13 Sep 2023 14:50:30 +0100 -Subject: [PATCH 054/157] [Backport][SME] recog: Improve parser for pattern new - compact syntax - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dd1091fe455c1ede5993b4cdf10d0f7c461b86d7 - -Hi all, - -this is to add support to the new compact pattern syntax for the case -where the constraints do appear unsorted like: - -(define_insn "*si3_insn_uxtw" - [(set (match_operand:DI 0 "register_operand") - (zero_extend:DI (SHIFT_no_rotate:SI - (match_operand:SI 1 "register_operand") - (match_operand:QI 2 "aarch64_reg_or_shift_imm_si"))))] - "" - {@ [cons: =0, 2, 1] - [ r, Uss, r] \\t%w0, %w1, %2 - [ r, r, r] \\t%w0, %w1, %w2 - } - [(set_attr "type" "bfx,shift_reg")] -) - -Best Regards - - Andrea - -gcc/Changelog - -2023-09-20 Richard Sandiford - - * gensupport.cc (convert_syntax): Updated to support unordered - constraints in compact syntax. ---- - gcc/gensupport.cc | 32 ++++++++++++++++---------------- - 1 file changed, 16 insertions(+), 16 deletions(-) - -diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc -index 23c61dcdd..97c614850 100644 ---- a/gcc/gensupport.cc -+++ b/gcc/gensupport.cc -@@ -895,19 +895,6 @@ convert_syntax (rtx x, file_location loc) - - parse_section_layout (loc, &templ, "cons:", tconvec, true); - -- /* Check for any duplicate cons entries and sort based on i. */ -- for (auto e : tconvec) -- { -- unsigned idx = e.idx; -- if (idx >= convec.size ()) -- convec.resize (idx + 1); -- -- if (convec[idx].idx >= 0) -- fatal_at (loc, "duplicate cons number found: %d", idx); -- convec[idx] = e; -- } -- tconvec.clear (); -- - if (*templ != ']') - { - if (*templ == ';') -@@ -950,13 +937,13 @@ convert_syntax (rtx x, file_location loc) - new_templ += '\n'; - new_templ.append (buffer); - /* Parse the constraint list, then the attribute list. */ -- if (convec.size () > 0) -- parse_section (&templ, convec.size (), alt_no, convec, loc, -+ if (tconvec.size () > 0) -+ parse_section (&templ, tconvec.size (), alt_no, tconvec, loc, - "constraint"); - - if (attrvec.size () > 0) - { -- if (convec.size () > 0 && !expect_char (&templ, ';')) -+ if (tconvec.size () > 0 && !expect_char (&templ, ';')) - fatal_at (loc, "expected `;' to separate constraints " - "and attributes in alternative %d", alt_no); - -@@ -1026,6 +1013,19 @@ convert_syntax (rtx x, file_location loc) - ++alt_no; - } - -+ /* Check for any duplicate cons entries and sort based on i. */ -+ for (auto e : tconvec) -+ { -+ unsigned idx = e.idx; -+ if (idx >= convec.size ()) -+ convec.resize (idx + 1); -+ -+ if (convec[idx].idx >= 0) -+ fatal_at (loc, "duplicate cons number found: %d", idx); -+ convec[idx] = e; -+ } -+ tconvec.clear (); -+ - /* Write the constraints and attributes into their proper places. */ - if (convec.size () > 0) - add_constraints (x, loc, convec); --- -2.33.0 - diff --git a/0154-Backport-SME-recog-Support-space-in-cons.patch b/0154-Backport-SME-recog-Support-space-in-cons.patch deleted file mode 100644 index f11eb41..0000000 --- a/0154-Backport-SME-recog-Support-space-in-cons.patch +++ /dev/null @@ -1,49 +0,0 @@ -From e593ad216bd1f4f75d9875898f352e0e5f978159 Mon Sep 17 00:00:00 2001 -From: Andrea Corallo -Date: Fri, 15 Sep 2023 10:23:02 +0200 -Subject: [PATCH 055/157] [Backport][SME] recog: Support space in "[ cons" - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=9d31045b21324166c3997d603961d99e3c4c357d - -Hi all, - -this is to allow for spaces before "cons:" in the definitions of -patterns using the new compact syntax, ex: - -(define_insn "aarch64_simd_dup" - [(set (match_operand:VDQ_I 0 "register_operand") - (vec_duplicate:VDQ_I - (match_operand: 1 "register_operand")))] - "TARGET_SIMD" - {@ [ cons: =0 , 1 ; attrs: type ] - [ w , w ; neon_dup ] dup\t%0., %1.[0] - [ w , ?r ; neon_from_gp ] dup\t%0., %1 - } -) - -gcc/Changelog - -2023-09-20 Andrea Corallo - - * gensupport.cc (convert_syntax): Skip spaces before "cons:" - in new compact pattern syntax. ---- - gcc/gensupport.cc | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc -index 97c614850..3d7a6d4fd 100644 ---- a/gcc/gensupport.cc -+++ b/gcc/gensupport.cc -@@ -893,6 +893,8 @@ convert_syntax (rtx x, file_location loc) - if (!expect_char (&templ, '[')) - fatal_at (loc, "expecing `[' to begin section list"); - -+ skip_spaces (&templ); -+ - parse_section_layout (loc, &templ, "cons:", tconvec, true); - - if (*templ != ']') --- -2.33.0 - diff --git a/0155-Backport-SME-aarch64-Generalise-require_immediate_la.patch b/0155-Backport-SME-aarch64-Generalise-require_immediate_la.patch deleted file mode 100644 index 3a47094..0000000 --- a/0155-Backport-SME-aarch64-Generalise-require_immediate_la.patch +++ /dev/null @@ -1,164 +0,0 @@ -From cb6d55f6bc7c490f72a43dd87543ab7a7ea582a8 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:18 +0000 -Subject: [PATCH 056/157] [Backport][SME] aarch64: Generalise - require_immediate_lane_index - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c0cf2c893d54420b0c19fee7bd41ae40017d0106 - -require_immediate_lane_index previously hard-coded the assumption -that the group size is determined by the argument immediately before -the index. However, for SME, there are cases where it should be -determined by an earlier argument instead. - -gcc/ - * config/aarch64/aarch64-sve-builtins.h: - (function_checker::require_immediate_lane_index): Add an argument - for the index of the indexed vector argument. - * config/aarch64/aarch64-sve-builtins.cc - (function_checker::require_immediate_lane_index): Likewise. - * config/aarch64/aarch64-sve-builtins-shapes.cc - (ternary_bfloat_lane_base::check): Update accordingly. - (ternary_qq_lane_base::check): Likewise. - (binary_lane_def::check): Likewise. - (binary_long_lane_def::check): Likewise. - (ternary_lane_def::check): Likewise. - (ternary_lane_rotate_def::check): Likewise. - (ternary_long_lane_def::check): Likewise. - (ternary_qq_lane_rotate_def::check): Likewise. ---- - .../aarch64/aarch64-sve-builtins-shapes.cc | 16 ++++++++-------- - gcc/config/aarch64/aarch64-sve-builtins.cc | 18 ++++++++++++------ - gcc/config/aarch64/aarch64-sve-builtins.h | 3 ++- - 3 files changed, 22 insertions(+), 15 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -index f57f92698..4fa4181b9 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -@@ -941,7 +941,7 @@ struct ternary_bfloat_lane_base - bool - check (function_checker &c) const OVERRIDE - { -- return c.require_immediate_lane_index (3, N); -+ return c.require_immediate_lane_index (3, 2, N); - } - }; - -@@ -956,7 +956,7 @@ struct ternary_qq_lane_base - bool - check (function_checker &c) const OVERRIDE - { -- return c.require_immediate_lane_index (3, 4); -+ return c.require_immediate_lane_index (3, 0); - } - }; - -@@ -1123,7 +1123,7 @@ struct binary_lane_def : public overloaded_base<0> - bool - check (function_checker &c) const OVERRIDE - { -- return c.require_immediate_lane_index (2); -+ return c.require_immediate_lane_index (2, 1); - } - }; - SHAPE (binary_lane) -@@ -1162,7 +1162,7 @@ struct binary_long_lane_def : public overloaded_base<0> - bool - check (function_checker &c) const OVERRIDE - { -- return c.require_immediate_lane_index (2); -+ return c.require_immediate_lane_index (2, 1); - } - }; - SHAPE (binary_long_lane) -@@ -2817,7 +2817,7 @@ struct ternary_lane_def : public overloaded_base<0> - bool - check (function_checker &c) const OVERRIDE - { -- return c.require_immediate_lane_index (3); -+ return c.require_immediate_lane_index (3, 2); - } - }; - SHAPE (ternary_lane) -@@ -2845,7 +2845,7 @@ struct ternary_lane_rotate_def : public overloaded_base<0> - bool - check (function_checker &c) const OVERRIDE - { -- return (c.require_immediate_lane_index (3, 2) -+ return (c.require_immediate_lane_index (3, 2, 2) - && c.require_immediate_one_of (4, 0, 90, 180, 270)); - } - }; -@@ -2868,7 +2868,7 @@ struct ternary_long_lane_def - bool - check (function_checker &c) const OVERRIDE - { -- return c.require_immediate_lane_index (3); -+ return c.require_immediate_lane_index (3, 2); - } - }; - SHAPE (ternary_long_lane) -@@ -2965,7 +2965,7 @@ struct ternary_qq_lane_rotate_def : public overloaded_base<0> - bool - check (function_checker &c) const OVERRIDE - { -- return (c.require_immediate_lane_index (3, 4) -+ return (c.require_immediate_lane_index (3, 0) - && c.require_immediate_one_of (4, 0, 90, 180, 270)); - } - }; -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc -index 91af96687..7924cdf0f 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc -@@ -2440,20 +2440,26 @@ function_checker::require_immediate_enum (unsigned int rel_argno, tree type) - return false; - } - --/* Check that argument REL_ARGNO is suitable for indexing argument -- REL_ARGNO - 1, in groups of GROUP_SIZE elements. REL_ARGNO counts -- from the end of the predication arguments. */ -+/* The intrinsic conceptually divides vector argument REL_VEC_ARGNO into -+ groups of GROUP_SIZE elements. Return true if argument REL_ARGNO is -+ a suitable constant index for selecting one of these groups. The -+ selection happens within a 128-bit quadword, rather than the whole vector. -+ -+ REL_ARGNO and REL_VEC_ARGNO count from the end of the predication -+ arguments. */ - bool - function_checker::require_immediate_lane_index (unsigned int rel_argno, -+ unsigned int rel_vec_argno, - unsigned int group_size) - { - unsigned int argno = m_base_arg + rel_argno; - if (!argument_exists_p (argno)) - return true; - -- /* Get the type of the previous argument. tree_argument_type wants a -- 1-based number, whereas ARGNO is 0-based. */ -- machine_mode mode = TYPE_MODE (type_argument_type (m_fntype, argno)); -+ /* Get the type of the vector argument. tree_argument_type wants a -+ 1-based number, whereas VEC_ARGNO is 0-based. */ -+ unsigned int vec_argno = m_base_arg + rel_vec_argno; -+ machine_mode mode = TYPE_MODE (type_argument_type (m_fntype, vec_argno + 1)); - gcc_assert (VECTOR_MODE_P (mode)); - unsigned int nlanes = 128 / (group_size * GET_MODE_UNIT_BITSIZE (mode)); - return require_immediate_range (rel_argno, 0, nlanes - 1); -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h -index 52994cde0..824c31cd7 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.h -+++ b/gcc/config/aarch64/aarch64-sve-builtins.h -@@ -463,7 +463,8 @@ public: - bool require_immediate_either_or (unsigned int, HOST_WIDE_INT, - HOST_WIDE_INT); - bool require_immediate_enum (unsigned int, tree); -- bool require_immediate_lane_index (unsigned int, unsigned int = 1); -+ bool require_immediate_lane_index (unsigned int, unsigned int, -+ unsigned int = 1); - bool require_immediate_one_of (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT, - HOST_WIDE_INT, HOST_WIDE_INT); - bool require_immediate_range (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT); --- -2.33.0 - diff --git a/0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch b/0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch deleted file mode 100644 index 607c83c..0000000 --- a/0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch +++ /dev/null @@ -1,469 +0,0 @@ -From 8394394bd26c7be6129b9a4e673d2a3530d9efde Mon Sep 17 00:00:00 2001 -From: Christophe Lyon -Date: Fri, 11 Mar 2022 16:21:02 +0000 -Subject: [PATCH 057/157] [Backport][SME] aarch64: Add backend support for DFP - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0dc8e1e7026d9b8ec8b669c051786d426a52cd22 - -This patch updates the aarch64 backend as needed to support DFP modes -(SD, DD and TD). - -Changes v1->v2: - -* Drop support for DFP modes in - aarch64_gen_{load||store}[wb]_pair as these are only used in - prologue/epilogue where DFP modes are not used. Drop the - changes to the corresponding patterns in aarch64.md, and - useless GPF_PAIR iterator. - -* In aarch64_reinterpret_float_as_int, handle DDmode the same way - as DFmode (needed in case the representation of the - floating-point value can be loaded using mov/movk. - -* In aarch64_float_const_zero_rtx_p, reject constants with DFP - mode: when X is zero, the callers want to emit either '0' or - 'zr' depending on the context, which is not the way 0.0 is - represented in DFP mode (in particular fmov d0, #0 is not right - for DFP). - -* In aarch64_legitimate_constant_p, accept DFP - -2022-03-31 Christophe Lyon - - gcc/ - * config/aarch64/aarch64.cc - (aarch64_split_128bit_move): Handle DFP modes. - (aarch64_mode_valid_for_sched_fusion_p): Likewise. - (aarch64_classify_address): Likewise. - (aarch64_legitimize_address_displacement): Likewise. - (aarch64_reinterpret_float_as_int): Likewise. - (aarch64_float_const_zero_rtx_p): Likewise. - (aarch64_can_const_movi_rtx_p): Likewise. - (aarch64_anchor_offset): Likewise. - (aarch64_secondary_reload): Likewise. - (aarch64_rtx_costs): Likewise. - (aarch64_legitimate_constant_p): Likewise. - (aarch64_gimplify_va_arg_expr): Likewise. - (aapcs_vfp_sub_candidate): Likewise. - (aarch64_vfp_is_call_or_return_candidate): Likewise. - (aarch64_output_scalar_simd_mov_immediate): Likewise. - (aarch64_gen_adjusted_ldpstp): Likewise. - (aarch64_scalar_mode_supported_p): Accept DFP modes if enabled. - * config/aarch64/aarch64.md - (movsf_aarch64): Use SFD iterator and rename into - mov_aarch64. - (movdf_aarch64): Use DFD iterator and rename into - mov_aarch64. - (movtf_aarch64): Use TFD iterator and rename into - mov_aarch64. - (split pattern for move TF mode): Use TFD iterator. - * config/aarch64/iterators.md - (GPF_TF_F16_MOV): Add DFP modes. - (SFD, DFD, TFD): New iterators. - (GPF_TF): Add DFP modes. - (TX, DX, DX2): Likewise. ---- - gcc/config/aarch64/aarch64.cc | 82 ++++++++++++++++++++++----------- - gcc/config/aarch64/aarch64.md | 34 +++++++------- - gcc/config/aarch64/iterators.md | 24 +++++++--- - 3 files changed, 89 insertions(+), 51 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 055b436b1..02210ed13 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -5068,7 +5068,7 @@ aarch64_split_128bit_move (rtx dst, rtx src) - - machine_mode mode = GET_MODE (dst); - -- gcc_assert (mode == TImode || mode == TFmode); -+ gcc_assert (mode == TImode || mode == TFmode || mode == TDmode); - gcc_assert (!(side_effects_p (src) || side_effects_p (dst))); - gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode); - -@@ -10834,6 +10834,7 @@ aarch64_mode_valid_for_sched_fusion_p (machine_mode mode) - { - return mode == SImode || mode == DImode - || mode == SFmode || mode == DFmode -+ || mode == SDmode || mode == DDmode - || (aarch64_vector_mode_supported_p (mode) - && (known_eq (GET_MODE_SIZE (mode), 8) - || (known_eq (GET_MODE_SIZE (mode), 16) -@@ -10876,12 +10877,13 @@ aarch64_classify_address (struct aarch64_address_info *info, - vec_flags &= ~VEC_PARTIAL; - - /* On BE, we use load/store pair for all large int mode load/stores. -- TI/TFmode may also use a load/store pair. */ -+ TI/TF/TDmode may also use a load/store pair. */ - bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT)); - bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP - || type == ADDR_QUERY_LDP_STP_N - || mode == TImode - || mode == TFmode -+ || mode == TDmode - || (BYTES_BIG_ENDIAN && advsimd_struct_p)); - /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode - corresponds to the actual size of the memory being loaded/stored and the -@@ -10955,7 +10957,7 @@ aarch64_classify_address (struct aarch64_address_info *info, - info->offset = op1; - info->const_offset = offset; - -- /* TImode and TFmode values are allowed in both pairs of X -+ /* TImode, TFmode and TDmode values are allowed in both pairs of X - registers and individual Q registers. The available - address modes are: - X,X: 7-bit signed scaled offset -@@ -10964,7 +10966,7 @@ aarch64_classify_address (struct aarch64_address_info *info, - When performing the check for pairs of X registers i.e. LDP/STP - pass down DImode since that is the natural size of the LDP/STP - instruction memory accesses. */ -- if (mode == TImode || mode == TFmode) -+ if (mode == TImode || mode == TFmode || mode == TDmode) - return (aarch64_offset_7bit_signed_scaled_p (DImode, offset) - && (aarch64_offset_9bit_signed_unscaled_p (mode, offset) - || offset_12bit_unsigned_scaled_p (mode, offset))); -@@ -11087,14 +11089,14 @@ aarch64_classify_address (struct aarch64_address_info *info, - info->offset = XEXP (XEXP (x, 1), 1); - info->const_offset = offset; - -- /* TImode and TFmode values are allowed in both pairs of X -+ /* TImode, TFmode and TDmode values are allowed in both pairs of X - registers and individual Q registers. The available - address modes are: - X,X: 7-bit signed scaled offset - Q: 9-bit signed offset - We conservatively require an offset representable in either mode. - */ -- if (mode == TImode || mode == TFmode) -+ if (mode == TImode || mode == TFmode || mode == TDmode) - return (aarch64_offset_7bit_signed_scaled_p (mode, offset) - && aarch64_offset_9bit_signed_unscaled_p (mode, offset)); - -@@ -11256,9 +11258,9 @@ aarch64_legitimize_address_displacement (rtx *offset1, rtx *offset2, - offset. Use 4KB range for 1- and 2-byte accesses and a 16KB - range otherwise to increase opportunities for sharing the base - address of different sizes. Unaligned accesses use the signed -- 9-bit range, TImode/TFmode use the intersection of signed -+ 9-bit range, TImode/TFmode/TDmode use the intersection of signed - scaled 7-bit and signed 9-bit offset. */ -- if (mode == TImode || mode == TFmode) -+ if (mode == TImode || mode == TFmode || mode == TDmode) - second_offset = ((const_offset + 0x100) & 0x1f8) - 0x100; - else if ((const_offset & (size - 1)) != 0) - second_offset = ((const_offset + 0x100) & 0x1ff) - 0x100; -@@ -11339,7 +11341,7 @@ aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval) - CONST_DOUBLE_REAL_VALUE (value), - REAL_MODE_FORMAT (mode)); - -- if (mode == DFmode) -+ if (mode == DFmode || mode == DDmode) - { - int order = BYTES_BIG_ENDIAN ? 1 : 0; - ival = zext_hwi (res[order], 32); -@@ -11380,11 +11382,15 @@ aarch64_float_const_rtx_p (rtx x) - return false; - } - --/* Return TRUE if rtx X is immediate constant 0.0 */ -+/* Return TRUE if rtx X is immediate constant 0.0 (but not in Decimal -+ Floating Point). */ - bool - aarch64_float_const_zero_rtx_p (rtx x) - { -- if (GET_MODE (x) == VOIDmode) -+ /* 0.0 in Decimal Floating Point cannot be represented by #0 or -+ zr as our callers expect, so no need to check the actual -+ value if X is of Decimal Floating Point type. */ -+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_DECIMAL_FLOAT) - return false; - - if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x))) -@@ -11422,7 +11428,7 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode) - else - return false; - -- /* use a 64 bit mode for everything except for DI/DF mode, where we use -+ /* use a 64 bit mode for everything except for DI/DF/DD mode, where we use - a 128 bit vector mode. */ - int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64; - -@@ -12628,7 +12634,7 @@ aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size, - if (IN_RANGE (offset, -256, 0)) - return 0; - -- if (mode == TImode || mode == TFmode) -+ if (mode == TImode || mode == TFmode || mode == TDmode) - return (offset + 0x100) & ~0x1ff; - - /* Use 12-bit offset by access size. */ -@@ -12737,7 +12743,9 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, - - /* Without the TARGET_SIMD instructions we cannot move a Q register - to a Q register directly. We need a scratch. */ -- if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x) -+ if (REG_P (x) -+ && (mode == TFmode || mode == TImode || mode == TDmode) -+ && mode == GET_MODE (x) - && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD - && reg_class_subset_p (rclass, FP_REGS)) - { -@@ -12745,14 +12753,16 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, - return NO_REGS; - } - -- /* A TFmode or TImode memory access should be handled via an FP_REGS -+ /* A TFmode, TImode or TDmode memory access should be handled via an FP_REGS - because AArch64 has richer addressing modes for LDR/STR instructions - than LDP/STP instructions. */ - if (TARGET_FLOAT && rclass == GENERAL_REGS - && known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x)) - return FP_REGS; - -- if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x)) -+ if (rclass == FP_REGS -+ && (mode == TImode || mode == TFmode || mode == TDmode) -+ && CONSTANT_P(x)) - return GENERAL_REGS; - - return NO_REGS; -@@ -13883,9 +13893,9 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, - *cost += extra_cost->ldst.storev; - else if (GET_MODE_CLASS (mode) == MODE_INT) - *cost += extra_cost->ldst.store; -- else if (mode == SFmode) -+ else if (mode == SFmode || mode == SDmode) - *cost += extra_cost->ldst.storef; -- else if (mode == DFmode) -+ else if (mode == DFmode || mode == DDmode) - *cost += extra_cost->ldst.stored; - - *cost += -@@ -14009,11 +14019,11 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, - /* mov[df,sf]_aarch64. */ - if (aarch64_float_const_representable_p (x)) - /* FMOV (scalar immediate). */ -- *cost += extra_cost->fp[mode == DFmode].fpconst; -+ *cost += extra_cost->fp[mode == DFmode || mode == DDmode].fpconst; - else if (!aarch64_float_const_zero_rtx_p (x)) - { - /* This will be a load from memory. */ -- if (mode == DFmode) -+ if (mode == DFmode || mode == DDmode) - *cost += extra_cost->ldst.loadd; - else - *cost += extra_cost->ldst.loadf; -@@ -14039,9 +14049,9 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, - *cost += extra_cost->ldst.loadv; - else if (GET_MODE_CLASS (mode) == MODE_INT) - *cost += extra_cost->ldst.load; -- else if (mode == SFmode) -+ else if (mode == SFmode || mode == SDmode) - *cost += extra_cost->ldst.loadf; -- else if (mode == DFmode) -+ else if (mode == DFmode || mode == DDmode) - *cost += extra_cost->ldst.loadd; - - *cost += -@@ -19623,7 +19633,7 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x) - { - /* Support CSE and rematerialization of common constants. */ - if (CONST_INT_P (x) -- || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT)) -+ || CONST_DOUBLE_P (x)) - return true; - - /* Only accept variable-length vector constants if they can be -@@ -20064,6 +20074,18 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, - field_t = long_double_type_node; - field_ptr_t = long_double_ptr_type_node; - break; -+ case SDmode: -+ field_t = dfloat32_type_node; -+ field_ptr_t = build_pointer_type (dfloat32_type_node); -+ break; -+ case DDmode: -+ field_t = dfloat64_type_node; -+ field_ptr_t = build_pointer_type (dfloat64_type_node); -+ break; -+ case TDmode: -+ field_t = dfloat128_type_node; -+ field_ptr_t = build_pointer_type (dfloat128_type_node); -+ break; - case E_HFmode: - field_t = aarch64_fp16_type_node; - field_ptr_t = aarch64_fp16_ptr_type_node; -@@ -20315,7 +20337,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep, - case REAL_TYPE: - mode = TYPE_MODE (type); - if (mode != DFmode && mode != SFmode -- && mode != TFmode && mode != HFmode) -+ && mode != TFmode && mode != HFmode -+ && mode != SDmode && mode != DDmode && mode != TDmode) - return -1; - - if (*modep == VOIDmode) -@@ -20631,7 +20654,9 @@ aarch64_vfp_is_call_or_return_candidate (machine_mode mode, - machine_mode new_mode = VOIDmode; - bool composite_p = aarch64_composite_type_p (type, mode); - -- if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT) -+ if ((!composite_p -+ && (GET_MODE_CLASS (mode) == MODE_FLOAT -+ || GET_MODE_CLASS (mode) == MODE_DECIMAL_FLOAT)) - || aarch64_short_vector_p (type, mode)) - { - *count = 1; -@@ -23565,7 +23590,7 @@ aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode) - } - - machine_mode vmode; -- /* use a 64 bit mode for everything except for DI/DF mode, where we use -+ /* use a 64 bit mode for everything except for DI/DF/DD mode, where we use - a 128 bit vector mode. */ - int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64; - -@@ -26417,7 +26442,7 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load, - base_off = (off_val_1 + off_val_3) / 2; - else - /* However, due to issues with negative LDP/STP offset generation for -- larger modes, for DF, DI and vector modes. we must not use negative -+ larger modes, for DF, DD, DI and vector modes. we must not use negative - addresses smaller than 9 signed unadjusted bits can store. This - provides the most range in this case. */ - base_off = off_val_1; -@@ -26695,6 +26720,9 @@ aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode) - static bool - aarch64_scalar_mode_supported_p (scalar_mode mode) - { -+ if (DECIMAL_FLOAT_MODE_P (mode)) -+ return default_decimal_float_supported_p (); -+ - return (mode == HFmode - ? true - : default_scalar_mode_supported_p (mode)); -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index a78476c8a..8757a962f 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -1476,11 +1476,11 @@ - (set_attr "arch" "simd,fp16,simd,*,simd,*,simd,*,fp16,simd,*,*,*,*,*")] - ) - --(define_insn "*movsf_aarch64" -- [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r") -- (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))] -- "TARGET_FLOAT && (register_operand (operands[0], SFmode) -- || aarch64_reg_or_fp_zero (operands[1], SFmode))" -+(define_insn "*mov_aarch64" -+ [(set (match_operand:SFD 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r") -+ (match_operand:SFD 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))] -+ "TARGET_FLOAT && (register_operand (operands[0], mode) -+ || aarch64_reg_or_fp_zero (operands[1], mode))" - "@ - movi\\t%0.2s, #0 - fmov\\t%s0, %w1 -@@ -1500,11 +1500,11 @@ - (set_attr "arch" "simd,*,*,*,*,simd,*,*,*,*,*,*")] - ) - --(define_insn "*movdf_aarch64" -- [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r") -- (match_operand:DF 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))] -- "TARGET_FLOAT && (register_operand (operands[0], DFmode) -- || aarch64_reg_or_fp_zero (operands[1], DFmode))" -+(define_insn "*mov_aarch64" -+ [(set (match_operand:DFD 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r") -+ (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))] -+ "TARGET_FLOAT && (register_operand (operands[0], mode) -+ || aarch64_reg_or_fp_zero (operands[1], mode))" - "@ - movi\\t%d0, #0 - fmov\\t%d0, %x1 -@@ -1545,13 +1545,13 @@ - } - ) - --(define_insn "*movtf_aarch64" -- [(set (match_operand:TF 0 -+(define_insn "*mov_aarch64" -+ [(set (match_operand:TFD 0 - "nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m") -- (match_operand:TF 1 -+ (match_operand:TFD 1 - "general_operand" " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))] -- "TARGET_FLOAT && (register_operand (operands[0], TFmode) -- || aarch64_reg_or_fp_zero (operands[1], TFmode))" -+ "TARGET_FLOAT && (register_operand (operands[0], mode) -+ || aarch64_reg_or_fp_zero (operands[1], mode))" - "@ - mov\\t%0.16b, %1.16b - # -@@ -1571,8 +1571,8 @@ - ) - - (define_split -- [(set (match_operand:TF 0 "register_operand" "") -- (match_operand:TF 1 "nonmemory_operand" ""))] -+ [(set (match_operand:TFD 0 "register_operand" "") -+ (match_operand:TFD 1 "nonmemory_operand" ""))] - "reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])" - [(const_int 0)] - { -diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md -index 967e6b0b1..d0cd1b788 100644 ---- a/gcc/config/aarch64/iterators.md -+++ b/gcc/config/aarch64/iterators.md -@@ -67,14 +67,24 @@ - (define_mode_iterator GPF_TF_F16 [HF SF DF TF]) - - ;; Iterator for all scalar floating point modes suitable for moving, including --;; special BF type (HF, SF, DF, TF and BF) --(define_mode_iterator GPF_TF_F16_MOV [HF BF SF DF TF]) -+;; special BF type and decimal floating point types (HF, SF, DF, TF, BF, -+;; SD, DD and TD) -+(define_mode_iterator GPF_TF_F16_MOV [HF BF SF DF TF SD DD TD]) -+ -+;; Iterator for scalar 32bit fp modes (SF, SD) -+(define_mode_iterator SFD [SD SF]) -+ -+;; Iterator for scalar 64bit fp modes (DF, DD) -+(define_mode_iterator DFD [DD DF]) -+ -+;; Iterator for scalar 128bit fp modes (TF, TD) -+(define_mode_iterator TFD [TD TF]) - - ;; Double vector modes. - (define_mode_iterator VDF [V2SF V4HF]) - --;; Iterator for all scalar floating point modes (SF, DF and TF) --(define_mode_iterator GPF_TF [SF DF TF]) -+;; Iterator for all scalar floating point modes (SF, DF, TF, SD, DD, and TD) -+(define_mode_iterator GPF_TF [SF DF TF SD DD TD]) - - ;; Integer Advanced SIMD modes. - (define_mode_iterator VDQ_I [V8QI V16QI V4HI V8HI V2SI V4SI V2DI]) -@@ -301,7 +311,7 @@ - ;; 2 and 4 lane SI modes. - (define_mode_iterator VS [V2SI V4SI]) - --(define_mode_iterator TX [TI TF]) -+(define_mode_iterator TX [TI TF TD]) - - ;; Advanced SIMD opaque structure modes. - (define_mode_iterator VSTRUCT [OI CI XI]) -@@ -403,10 +413,10 @@ - V4x8HF V4x4SF V4x2DF V4x8BF]) - - ;; Double scalar modes --(define_mode_iterator DX [DI DF]) -+(define_mode_iterator DX [DI DF DD]) - - ;; Duplicate of the above --(define_mode_iterator DX2 [DI DF]) -+(define_mode_iterator DX2 [DI DF DD]) - - ;; Single scalar modes - (define_mode_iterator SX [SI SF]) --- -2.33.0 - diff --git a/0157-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch b/0157-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch deleted file mode 100644 index b2257ea..0000000 --- a/0157-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch +++ /dev/null @@ -1,1824 +0,0 @@ -From 737d2a5f1c5e725b7e5a20075270016ebf56b44c Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 13 Sep 2022 09:28:49 +0100 -Subject: [PATCH 058/157] [Backport][SME] aarch64: Vector move fixes for - +nosimd - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=721c0fb3aca31d3bf8ad6e929eab32e29a427e60 - -This patch fixes various issues around the handling of vectors -and (particularly) vector structures with +nosimd. Previously, -passing and returning structures would trigger an ICE, since: - -* we didn't allow the structure modes to be stored in FPRs - -* we didn't provide +nosimd move patterns - -* splitting the moves into word-sized pieces (the default - strategy without move patterns) doesn't work because the - registers are doubleword sized. - -The patch is a bit of a hodge-podge since a lot of the handling of -moves, register costs, and register legitimacy is so interconnected. -It didn't seem feasible to split things further. - -Some notes: - -* The patch recognises vector and tuple modes based on TARGET_FLOAT - rather than TARGET_SIMD, and instead adds TARGET_SIMD to places - that really do need the vector ISA. This is necessary for the - modes to be handled correctly in register arguments and returns. - -* The 64-bit (DREG) STP peephole required TARGET_SIMD but the - LDP peephole didn't. I think the LDP one is right, since - DREG moves could involve GPRs as well as FPRs. - -* The patch keeps the existing choices of instructions for - TARGET_SIMD, just in case they happen to be better than FMOV - on some uarches. - -* Before the patch, +nosimd Q<->Q moves of 128-bit scalars went via - a GPR, thanks to a secondary reload pattern. This approach might - not be ideal, but there's no reason that 128-bit vectors should - behave differently from 128-bit scalars. The patch therefore - extends the current scalar approach to vectors. - -* Multi-vector LD1 and ST1 require TARGET_SIMD, so the TARGET_FLOAT - structure moves need to use LDP/STP and LDR/STR combinations - instead. That's also what we do for big-endian even with - TARGET_SIMD, so most of the code was already there. The patterns - for structures of 64-bit vectors are identical, but the patterns - for structures of 128-bit vectors need to cope with the lack of - 128-bit Q<->Q moves. - - It isn't feasible to move multi-vector tuples via GPRs, so the - patch moves them via memory instead. This contaminates the port - with its first secondary memory reload. - -gcc/ - - * config/aarch64/aarch64.cc (aarch64_classify_vector_mode): Use - TARGET_FLOAT instead of TARGET_SIMD. - (aarch64_vectorize_related_mode): Restrict ADVSIMD handling to - TARGET_SIMD. - (aarch64_hard_regno_mode_ok): Don't allow tuples of 2 64-bit vectors - in GPRs. - (aarch64_classify_address): Treat little-endian structure moves - like big-endian for TARGET_FLOAT && !TARGET_SIMD. - (aarch64_secondary_memory_needed): New function. - (aarch64_secondary_reload): Handle 128-bit Advanced SIMD vectors - in the same way as TF, TI and TD. - (aarch64_rtx_mult_cost): Restrict ADVSIMD handling to TARGET_SIMD. - (aarch64_rtx_costs): Likewise. - (aarch64_register_move_cost): Treat a pair of 64-bit vectors - separately from a single 128-bit vector. Handle the cost implied - by aarch64_secondary_memory_needed. - (aarch64_simd_valid_immediate): Restrict ADVSIMD handling to - TARGET_SIMD. - (aarch64_expand_vec_perm_const_1): Likewise. - (TARGET_SECONDARY_MEMORY_NEEDED): New macro. - * config/aarch64/iterators.md (VTX): New iterator. - * config/aarch64/aarch64.md (arches): Add fp_q as a synonym of simd. - (arch_enabled): Adjust accordingly. - (@aarch64_reload_mov): Extend to... - (@aarch64_reload_mov): ...this. - * config/aarch64/aarch64-simd.md (mov): Require TARGET_FLOAT - rather than TARGET_SIMD. - (movmisalign): Likewise. - (load_pair): Likewise. - (vec_store_pair): Likewise. - (load_pair): Likewise. - (vec_store_pair): Likewise. - (@aarch64_split_simd_mov): Likewise. - (aarch64_get_low): Likewise. - (aarch64_get_high): Likewise. - (aarch64_get_half): Likewise. Canonicalize to a move for - lowpart extracts. - (*aarch64_simd_mov): Require TARGET_FLOAT rather than - TARGET_SIMD. Use different w<-w and r<-w instructions for - !TARGET_SIMD. Disable immediate moves for !TARGET_SIMD but - add an alternative specifically for w<-Z. - (*aarch64_simd_mov): Require TARGET_FLOAT rather than - TARGET_SIMD. Likewise for the associated define_splits. Disable - FPR moves and immediate moves for !TARGET_SIMD but add an alternative - specifically for w<-Z. - (aarch64_simd_mov_from_high): Require TARGET_FLOAT rather than - TARGET_SIMD. Restrict the existing alternatives to TARGET_SIMD - but add a new r<-w one for !TARGET_SIMD. - (*aarch64_get_high): New pattern. - (load_pair_lanes): Require TARGET_FLOAT rather than TARGET_SIMD. - (store_pair_lanes): Likewise. - (*aarch64_combine_internal): Likewise. Restrict existing - w<-w, w<-r and w<-m alternatives to TARGET_SIMD but add a new w<-r - alternative for !TARGET_SIMD. - (*aarch64_combine_internal_be): Likewise. - (aarch64_combinez): Require TARGET_FLOAT rather than TARGET_SIMD. - Remove bogus arch attribute. - (*aarch64_combinez_be): Likewise. - (@aarch64_vec_concat): Require TARGET_FLOAT rather than - TARGET_SIMD. - (aarch64_combine): Likewise. - (aarch64_rev_reglist): Likewise. - (mov): Likewise. - (*aarch64_be_mov): Extend to TARGET_FLOAT && - !TARGET_SIMD, regardless of endianness. Extend associated - define_splits in the same way, both for this pattern and the - ones below. - (*aarch64_be_mov): Likewise. Restrict w<-w - alternative to TARGET_SIMD. - (*aarch64_be_movoi): Likewise. - (*aarch64_be_movci): Likewise. - (*aarch64_be_movxi): Likewise. - (*aarch64_be_mov): Extend to TARGET_FLOAT - && !TARGET_SIMD, regardless of endianness. Restrict w<-w alternative - to TARGET_SIMD for tuples of 128-bit vectors. - (*aarch64_be_mov): Likewise. - * config/aarch64/aarch64-ldpstp.md: Remove TARGET_SIMD condition - from DREG STP peephole. Change TARGET_SIMD to TARGET_FLOAT in - the VQ and VP_2E LDP and STP peepholes. - -gcc/testsuite/ - * gcc.target/aarch64/ldp_stp_20.c: New test. - * gcc.target/aarch64/ldp_stp_21.c: Likewise. - * gcc.target/aarch64/ldp_stp_22.c: Likewise. - * gcc.target/aarch64/ldp_stp_23.c: Likewise. - * gcc.target/aarch64/ldp_stp_24.c: Likewise. - * gcc.target/aarch64/movv16qi_1.c (gpr_to_gpr): New function. - * gcc.target/aarch64/movv8qi_1.c (gpr_to_gpr): Likewise. - * gcc.target/aarch64/movv16qi_2.c: New test. - * gcc.target/aarch64/movv16qi_3.c: Likewise. - * gcc.target/aarch64/movv2di_1.c: Likewise. - * gcc.target/aarch64/movv2x16qi_1.c: Likewise. - * gcc.target/aarch64/movv2x8qi_1.c: Likewise. - * gcc.target/aarch64/movv3x16qi_1.c: Likewise. - * gcc.target/aarch64/movv3x8qi_1.c: Likewise. - * gcc.target/aarch64/movv4x16qi_1.c: Likewise. - * gcc.target/aarch64/movv4x8qi_1.c: Likewise. - * gcc.target/aarch64/movv8qi_2.c: Likewise. - * gcc.target/aarch64/movv8qi_3.c: Likewise. - * gcc.target/aarch64/vect_unary_2.c: Likewise. ---- - gcc/config/aarch64/aarch64-ldpstp.md | 11 +- - gcc/config/aarch64/aarch64-simd.md | 199 +++++++++++------- - gcc/config/aarch64/aarch64.cc | 94 ++++++--- - gcc/config/aarch64/aarch64.md | 11 +- - gcc/config/aarch64/iterators.md | 2 + - gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c | 7 + - gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c | 7 + - gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c | 13 ++ - gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c | 16 ++ - gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c | 16 ++ - gcc/testsuite/gcc.target/aarch64/movv16qi_1.c | 21 ++ - gcc/testsuite/gcc.target/aarch64/movv16qi_2.c | 27 +++ - gcc/testsuite/gcc.target/aarch64/movv16qi_3.c | 30 +++ - gcc/testsuite/gcc.target/aarch64/movv2di_1.c | 103 +++++++++ - .../gcc.target/aarch64/movv2x16qi_1.c | 40 ++++ - .../gcc.target/aarch64/movv2x8qi_1.c | 38 ++++ - .../gcc.target/aarch64/movv3x16qi_1.c | 44 ++++ - .../gcc.target/aarch64/movv3x8qi_1.c | 41 ++++ - .../gcc.target/aarch64/movv4x16qi_1.c | 44 ++++ - .../gcc.target/aarch64/movv4x8qi_1.c | 42 ++++ - gcc/testsuite/gcc.target/aarch64/movv8qi_1.c | 15 ++ - gcc/testsuite/gcc.target/aarch64/movv8qi_2.c | 27 +++ - gcc/testsuite/gcc.target/aarch64/movv8qi_3.c | 30 +++ - .../gcc.target/aarch64/vect_unary_2.c | 5 + - 24 files changed, 774 insertions(+), 109 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_2.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_3.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movv2di_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movv2x16qi_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movv2x8qi_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movv3x16qi_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movv3x8qi_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movv4x16qi_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movv4x8qi_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movv8qi_2.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movv8qi_3.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/vect_unary_2.c - -diff --git a/gcc/config/aarch64/aarch64-ldpstp.md b/gcc/config/aarch64/aarch64-ldpstp.md -index ba76a1b78..f8446e212 100644 ---- a/gcc/config/aarch64/aarch64-ldpstp.md -+++ b/gcc/config/aarch64/aarch64-ldpstp.md -@@ -83,8 +83,7 @@ - (match_operand:DREG 1 "register_operand" "")) - (set (match_operand:DREG2 2 "memory_operand" "") - (match_operand:DREG2 3 "register_operand" ""))] -- "TARGET_SIMD -- && aarch64_operands_ok_for_ldpstp (operands, false, mode)" -+ "aarch64_operands_ok_for_ldpstp (operands, false, mode)" - [(parallel [(set (match_dup 0) (match_dup 1)) - (set (match_dup 2) (match_dup 3))])] - { -@@ -96,7 +95,7 @@ - (match_operand:VQ 1 "memory_operand" "")) - (set (match_operand:VQ2 2 "register_operand" "") - (match_operand:VQ2 3 "memory_operand" ""))] -- "TARGET_SIMD -+ "TARGET_FLOAT - && aarch64_operands_ok_for_ldpstp (operands, true, mode) - && (aarch64_tune_params.extra_tuning_flags - & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0" -@@ -111,7 +110,7 @@ - (match_operand:VQ 1 "register_operand" "")) - (set (match_operand:VQ2 2 "memory_operand" "") - (match_operand:VQ2 3 "register_operand" ""))] -- "TARGET_SIMD -+ "TARGET_FLOAT - && aarch64_operands_ok_for_ldpstp (operands, false, mode) - && (aarch64_tune_params.extra_tuning_flags - & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0" -@@ -306,7 +305,7 @@ - (set (match_operand:VP_2E 6 "memory_operand" "") - (match_operand:VP_2E 7 "aarch64_reg_or_zero" "")) - (match_dup 8)] -- "TARGET_SIMD -+ "TARGET_FLOAT - && aarch64_operands_adjust_ok_for_ldpstp (operands, false, mode)" - [(const_int 0)] - { -@@ -327,7 +326,7 @@ - (set (match_operand:VP_2E 6 "register_operand" "") - (match_operand:VP_2E 7 "memory_operand" "")) - (match_dup 8)] -- "TARGET_SIMD -+ "TARGET_FLOAT - && aarch64_operands_adjust_ok_for_ldpstp (operands, true, mode)" - [(const_int 0)] - { -diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md -index a47b39281..ef7fc4ecb 100644 ---- a/gcc/config/aarch64/aarch64-simd.md -+++ b/gcc/config/aarch64/aarch64-simd.md -@@ -21,7 +21,7 @@ - (define_expand "mov" - [(set (match_operand:VALL_F16 0 "nonimmediate_operand") - (match_operand:VALL_F16 1 "general_operand"))] -- "TARGET_SIMD" -+ "TARGET_FLOAT" - " - /* Force the operand into a register if it is not an - immediate whose use can be replaced with xzr. -@@ -52,7 +52,7 @@ - (define_expand "movmisalign" - [(set (match_operand:VALL_F16 0 "nonimmediate_operand") - (match_operand:VALL_F16 1 "general_operand"))] -- "TARGET_SIMD && !STRICT_ALIGNMENT" -+ "TARGET_FLOAT && !STRICT_ALIGNMENT" - { - /* This pattern is not permitted to fail during expansion: if both arguments - are non-registers (e.g. memory := constant, which can be created by the -@@ -116,10 +116,10 @@ - - (define_insn "*aarch64_simd_mov" - [(set (match_operand:VDMOV 0 "nonimmediate_operand" -- "=w, m, m, w, ?r, ?w, ?r, w") -+ "=w, m, m, w, ?r, ?w, ?r, w, w") - (match_operand:VDMOV 1 "general_operand" -- "m, Dz, w, w, w, r, r, Dn"))] -- "TARGET_SIMD -+ "m, Dz, w, w, w, r, r, Dn, Dz"))] -+ "TARGET_FLOAT - && (register_operand (operands[0], mode) - || aarch64_simd_reg_or_zero (operands[1], mode))" - { -@@ -128,26 +128,34 @@ - case 0: return "ldr\t%d0, %1"; - case 1: return "str\txzr, %0"; - case 2: return "str\t%d1, %0"; -- case 3: return "mov\t%0., %1."; -- case 4: return "umov\t%0, %1.d[0]"; -+ case 3: -+ if (TARGET_SIMD) -+ return "mov\t%0., %1."; -+ return "fmov\t%d0, %d1"; -+ case 4: -+ if (TARGET_SIMD) -+ return "umov\t%0, %1.d[0]"; -+ return "fmov\t%x0, %d1"; - case 5: return "fmov\t%d0, %1"; - case 6: return "mov\t%0, %1"; - case 7: - return aarch64_output_simd_mov_immediate (operands[1], 64); -+ case 8: return "fmov\t%d0, xzr"; - default: gcc_unreachable (); - } - } - [(set_attr "type" "neon_load1_1reg, store_8, neon_store1_1reg,\ - neon_logic, neon_to_gp, f_mcr,\ -- mov_reg, neon_move")] -+ mov_reg, neon_move, f_mcr") -+ (set_attr "arch" "*,*,*,*,*,*,*,simd,*")] - ) - - (define_insn "*aarch64_simd_mov" - [(set (match_operand:VQMOV 0 "nonimmediate_operand" -- "=w, Umn, m, w, ?r, ?w, ?r, w") -+ "=w, Umn, m, w, ?r, ?w, ?r, w, w") - (match_operand:VQMOV 1 "general_operand" -- "m, Dz, w, w, w, r, r, Dn"))] -- "TARGET_SIMD -+ "m, Dz, w, w, w, r, r, Dn, Dz"))] -+ "TARGET_FLOAT - && (register_operand (operands[0], mode) - || aarch64_simd_reg_or_zero (operands[1], mode))" - { -@@ -167,14 +175,17 @@ - return "#"; - case 7: - return aarch64_output_simd_mov_immediate (operands[1], 128); -+ case 8: -+ return "fmov\t%d0, xzr"; - default: - gcc_unreachable (); - } - } - [(set_attr "type" "neon_load1_1reg, store_16, neon_store1_1reg,\ - neon_logic, multiple, multiple,\ -- multiple, neon_move") -- (set_attr "length" "4,4,4,4,8,8,8,4")] -+ multiple, neon_move, fmov") -+ (set_attr "length" "4,4,4,4,8,8,8,4,4") -+ (set_attr "arch" "*,*,*,simd,*,*,*,simd,*")] - ) - - ;; When storing lane zero we can use the normal STR and its more permissive -@@ -195,7 +206,7 @@ - (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump")) - (set (match_operand:DREG2 2 "register_operand" "=w") - (match_operand:DREG2 3 "memory_operand" "m"))] -- "TARGET_SIMD -+ "TARGET_FLOAT - && rtx_equal_p (XEXP (operands[3], 0), - plus_constant (Pmode, - XEXP (operands[1], 0), -@@ -209,7 +220,7 @@ - (match_operand:DREG 1 "register_operand" "w")) - (set (match_operand:DREG2 2 "memory_operand" "=m") - (match_operand:DREG2 3 "register_operand" "w"))] -- "TARGET_SIMD -+ "TARGET_FLOAT - && rtx_equal_p (XEXP (operands[2], 0), - plus_constant (Pmode, - XEXP (operands[0], 0), -@@ -223,7 +234,7 @@ - (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump")) - (set (match_operand:VQ2 2 "register_operand" "=w") - (match_operand:VQ2 3 "memory_operand" "m"))] -- "TARGET_SIMD -+ "TARGET_FLOAT - && rtx_equal_p (XEXP (operands[3], 0), - plus_constant (Pmode, - XEXP (operands[1], 0), -@@ -237,10 +248,11 @@ - (match_operand:VQ 1 "register_operand" "w")) - (set (match_operand:VQ2 2 "memory_operand" "=m") - (match_operand:VQ2 3 "register_operand" "w"))] -- "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0), -- plus_constant (Pmode, -- XEXP (operands[0], 0), -- GET_MODE_SIZE (mode)))" -+ "TARGET_FLOAT -+ && rtx_equal_p (XEXP (operands[2], 0), -+ plus_constant (Pmode, -+ XEXP (operands[0], 0), -+ GET_MODE_SIZE (mode)))" - "stp\\t%q1, %q3, %z0" - [(set_attr "type" "neon_stp_q")] - ) -@@ -248,8 +260,9 @@ - - (define_split - [(set (match_operand:VQMOV 0 "register_operand" "") -- (match_operand:VQMOV 1 "register_operand" ""))] -- "TARGET_SIMD && reload_completed -+ (match_operand:VQMOV 1 "register_operand" ""))] -+ "TARGET_FLOAT -+ && reload_completed - && GP_REGNUM_P (REGNO (operands[0])) - && GP_REGNUM_P (REGNO (operands[1]))" - [(const_int 0)] -@@ -261,7 +274,8 @@ - (define_split - [(set (match_operand:VQMOV 0 "register_operand" "") - (match_operand:VQMOV 1 "register_operand" ""))] -- "TARGET_SIMD && reload_completed -+ "TARGET_FLOAT -+ && reload_completed - && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))) - || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))" - [(const_int 0)] -@@ -273,7 +287,7 @@ - (define_expand "@aarch64_split_simd_mov" - [(set (match_operand:VQMOV 0) - (match_operand:VQMOV 1))] -- "TARGET_SIMD" -+ "TARGET_FLOAT" - { - rtx dst = operands[0]; - rtx src = operands[1]; -@@ -306,13 +320,20 @@ - (vec_select: - (match_operand:VQMOV 1 "register_operand") - (match_operand 2 "ascending_int_parallel")))] -- "TARGET_SIMD" -+ "TARGET_FLOAT" -+ { -+ if (vect_par_cnst_lo_half (operands[2], mode)) -+ { -+ emit_move_insn (operands[0], gen_lowpart (mode, operands[1])); -+ DONE; -+ } -+ } - ) - - (define_expand "aarch64_get_low" - [(match_operand: 0 "register_operand") - (match_operand:VQMOV 1 "register_operand")] -- "TARGET_SIMD" -+ "TARGET_FLOAT" - { - rtx lo = aarch64_simd_vect_par_cnst_half (mode, , false); - emit_insn (gen_aarch64_get_half (operands[0], operands[1], lo)); -@@ -323,7 +344,7 @@ - (define_expand "aarch64_get_high" - [(match_operand: 0 "register_operand") - (match_operand:VQMOV 1 "register_operand")] -- "TARGET_SIMD" -+ "TARGET_FLOAT" - { - rtx hi = aarch64_simd_vect_par_cnst_half (mode, , true); - emit_insn (gen_aarch64_get_half (operands[0], operands[1], hi)); -@@ -350,15 +371,17 @@ - ) - - (define_insn "aarch64_simd_mov_from_high" -- [(set (match_operand: 0 "register_operand" "=w,?r") -+ [(set (match_operand: 0 "register_operand" "=w,?r,?r") - (vec_select: -- (match_operand:VQMOV_NO2E 1 "register_operand" "w,w") -+ (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w") - (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))] -- "TARGET_SIMD" -+ "TARGET_FLOAT" - "@ -- dup\\t%d0, %1.d[1] -- umov\t%0, %1.d[1]" -- [(set_attr "type" "neon_dup,neon_to_gp") -+ dup\t%d0, %1.d[1] -+ umov\t%0, %1.d[1] -+ fmov\t%0, %1.d[1]" -+ [(set_attr "type" "neon_dup,neon_to_gp,f_mrc") -+ (set_attr "arch" "simd,simd,*") - (set_attr "length" "4")] - ) - -@@ -4322,12 +4345,22 @@ - [(set_attr "type" "neon_to_gp, neon_dup, neon_store1_one_lane")] - ) - -+(define_insn "*aarch64_get_high" -+ [(set (match_operand: 0 "aarch64_simd_nonimmediate_operand" "=r") -+ (vec_select: -+ (match_operand:VQ_2E 1 "register_operand" "w") -+ (parallel [(match_operand:SI 2 "immediate_operand")])))] -+ "TARGET_FLOAT && ENDIAN_LANE_N (, INTVAL (operands[2])) == 1" -+ "fmov\t%0, %1.d[1]" -+ [(set_attr "type" "f_mrc")] -+) -+ - (define_insn "load_pair_lanes" - [(set (match_operand: 0 "register_operand" "=w") - (vec_concat: - (match_operand:VDCSIF 1 "memory_operand" "Utq") - (match_operand:VDCSIF 2 "memory_operand" "m")))] -- "TARGET_SIMD -+ "TARGET_FLOAT - && aarch64_mergeable_load_pair_p (mode, operands[1], operands[2])" - "ldr\\t%0, %1" - [(set_attr "type" "neon_load1_1reg")] -@@ -4357,7 +4390,7 @@ - (vec_concat: - (match_operand:VDCSIF 1 "register_operand" "w, r") - (match_operand:VDCSIF 2 "register_operand" "w, r")))] -- "TARGET_SIMD" -+ "TARGET_FLOAT" - "@ - stp\t%1, %2, %y0 - stp\t%1, %2, %y0" -@@ -4372,39 +4405,44 @@ - ;; the register alternatives either don't accept or themselves disparage. - - (define_insn "*aarch64_combine_internal" -- [(set (match_operand: 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn") -+ [(set (match_operand: 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, w, Umn, Umn") - (vec_concat: -- (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r") -- (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, w, ?r")))] -- "TARGET_SIMD -+ (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, 0, ?w, ?r") -+ (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, ?r, Utv, w, ?r")))] -+ "TARGET_FLOAT - && !BYTES_BIG_ENDIAN - && (register_operand (operands[0], mode) - || register_operand (operands[2], mode))" - "@ - ins\t%0.[1], %2.[0] - ins\t%0.[1], %2 -+ fmov\t%0.d[1], %2 - ld1\t{%0.}[1], %2 - stp\t%1, %2, %y0 - stp\t%1, %2, %y0" -- [(set_attr "type" "neon_ins, neon_from_gp, neon_load1_one_lane, neon_stp, store_16")] -+ [(set_attr "type" "neon_ins, neon_from_gp, f_mcr, -+ neon_load1_one_lane, neon_stp, store_16") -+ (set_attr "arch" "simd,simd,*,simd,*,*")] - ) - - (define_insn "*aarch64_combine_internal_be" -- [(set (match_operand: 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn") -+ [(set (match_operand: 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, w, Umn, Umn") - (vec_concat: -- (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, ?w, ?r") -- (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r")))] -- "TARGET_SIMD -+ (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, ?r, Utv, ?w, ?r") -+ (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, 0, ?w, ?r")))] -+ "TARGET_FLOAT - && BYTES_BIG_ENDIAN - && (register_operand (operands[0], mode) - || register_operand (operands[2], mode))" - "@ - ins\t%0.[1], %2.[0] - ins\t%0.[1], %2 -+ fmov\t%0.d[1], %2 - ld1\t{%0.}[1], %2 - stp\t%2, %1, %y0 - stp\t%2, %1, %y0" -- [(set_attr "type" "neon_ins, neon_from_gp, neon_load1_one_lane, neon_stp, store_16")] -+ [(set_attr "type" "neon_ins, neon_from_gp, f_mcr, neon_load1_one_lane, neon_stp, store_16") -+ (set_attr "arch" "simd,simd,*,simd,*,*")] - ) - - ;; In this insn, operand 1 should be low, and operand 2 the high part of the -@@ -4415,13 +4453,12 @@ - (vec_concat: - (match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m") - (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")))] -- "TARGET_SIMD && !BYTES_BIG_ENDIAN" -+ "TARGET_FLOAT && !BYTES_BIG_ENDIAN" - "@ - fmov\\t%0, %1 - fmov\t%0, %1 - ldr\\t%0, %1" -- [(set_attr "type" "neon_move, neon_from_gp, neon_load1_1reg") -- (set_attr "arch" "simd,fp,simd")] -+ [(set_attr "type" "neon_move, neon_from_gp, neon_load1_1reg")] - ) - - (define_insn "*aarch64_combinez_be" -@@ -4429,13 +4466,12 @@ - (vec_concat: - (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero") - (match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m")))] -- "TARGET_SIMD && BYTES_BIG_ENDIAN" -+ "TARGET_FLOAT && BYTES_BIG_ENDIAN" - "@ - fmov\\t%0, %1 - fmov\t%0, %1 - ldr\\t%0, %1" -- [(set_attr "type" "neon_move, neon_from_gp, neon_load1_1reg") -- (set_attr "arch" "simd,fp,simd")] -+ [(set_attr "type" "neon_move, neon_from_gp, neon_load1_1reg")] - ) - - ;; Form a vector whose first half (in array order) comes from operand 1 -@@ -4446,7 +4482,7 @@ - (vec_concat: - (match_operand:VDCSIF 1 "general_operand") - (match_operand:VDCSIF 2 "general_operand")))] -- "TARGET_SIMD" -+ "TARGET_FLOAT" - { - int lo = BYTES_BIG_ENDIAN ? 2 : 1; - int hi = BYTES_BIG_ENDIAN ? 1 : 2; -@@ -4464,7 +4500,7 @@ - } - else - { -- /* Use *aarch64_combine_general. */ -+ /* Use *aarch64_combine_internal. */ - operands[lo] = force_reg (mode, operands[lo]); - if (!aarch64_simd_nonimmediate_operand (operands[hi], mode)) - { -@@ -4486,7 +4522,7 @@ - [(match_operand: 0 "register_operand") - (match_operand:VDC 1 "general_operand") - (match_operand:VDC 2 "general_operand")] -- "TARGET_SIMD" -+ "TARGET_FLOAT" - { - if (BYTES_BIG_ENDIAN) - std::swap (operands[1], operands[2]); -@@ -7367,7 +7403,7 @@ - (define_expand "mov" - [(set (match_operand:VSTRUCT_QD 0 "nonimmediate_operand") - (match_operand:VSTRUCT_QD 1 "general_operand"))] -- "TARGET_SIMD" -+ "TARGET_FLOAT" - { - if (can_create_pseudo_p ()) - { -@@ -7379,7 +7415,7 @@ - (define_expand "mov" - [(set (match_operand:VSTRUCT 0 "nonimmediate_operand") - (match_operand:VSTRUCT 1 "general_operand"))] -- "TARGET_SIMD" -+ "TARGET_FLOAT" - { - if (can_create_pseudo_p ()) - { -@@ -7559,7 +7595,8 @@ - (define_insn "*aarch64_be_mov" - [(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand" "=w,m,w") - (match_operand:VSTRUCT_2D 1 "general_operand" " w,w,m"))] -- "TARGET_SIMD && BYTES_BIG_ENDIAN -+ "TARGET_FLOAT -+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN) - && (register_operand (operands[0], mode) - || register_operand (operands[1], mode))" - "@ -@@ -7573,7 +7610,8 @@ - (define_insn "*aarch64_be_mov" - [(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand" "=w,m,w") - (match_operand:VSTRUCT_2Q 1 "general_operand" " w,w,m"))] -- "TARGET_SIMD && BYTES_BIG_ENDIAN -+ "TARGET_FLOAT -+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN) - && (register_operand (operands[0], mode) - || register_operand (operands[1], mode))" - "@ -@@ -7581,13 +7619,15 @@ - stp\\t%q1, %R1, %0 - ldp\\t%q0, %R0, %1" - [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q") -+ (set_attr "arch" "simd,*,*") - (set_attr "length" "8,4,4")] - ) - - (define_insn "*aarch64_be_movoi" - [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w") - (match_operand:OI 1 "general_operand" " w,w,m"))] -- "TARGET_SIMD && BYTES_BIG_ENDIAN -+ "TARGET_FLOAT -+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN) - && (register_operand (operands[0], OImode) - || register_operand (operands[1], OImode))" - "@ -@@ -7595,57 +7635,66 @@ - stp\\t%q1, %R1, %0 - ldp\\t%q0, %R0, %1" - [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q") -+ (set_attr "arch" "simd,*,*") - (set_attr "length" "8,4,4")] - ) - - (define_insn "*aarch64_be_mov" - [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w") - (match_operand:VSTRUCT_3QD 1 "general_operand" " w,w,o"))] -- "TARGET_SIMD && BYTES_BIG_ENDIAN -+ "TARGET_FLOAT -+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN) - && (register_operand (operands[0], mode) - || register_operand (operands[1], mode))" - "#" - [(set_attr "type" "multiple") -+ (set_attr "arch" "fp,*,*") - (set_attr "length" "12,8,8")] - ) - - (define_insn "*aarch64_be_movci" - [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w") - (match_operand:CI 1 "general_operand" " w,w,o"))] -- "TARGET_SIMD && BYTES_BIG_ENDIAN -+ "TARGET_FLOAT -+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN) - && (register_operand (operands[0], CImode) - || register_operand (operands[1], CImode))" - "#" - [(set_attr "type" "multiple") -- (set_attr "length" "12,4,4")] -+ (set_attr "arch" "simd,*,*") -+ (set_attr "length" "12,8,8")] - ) - - (define_insn "*aarch64_be_mov" - [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w") - (match_operand:VSTRUCT_4QD 1 "general_operand" " w,w,o"))] -- "TARGET_SIMD && BYTES_BIG_ENDIAN -+ "TARGET_FLOAT -+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN) - && (register_operand (operands[0], mode) - || register_operand (operands[1], mode))" - "#" - [(set_attr "type" "multiple") -+ (set_attr "arch" "fp,*,*") - (set_attr "length" "16,8,8")] - ) - - (define_insn "*aarch64_be_movxi" - [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w") - (match_operand:XI 1 "general_operand" " w,w,o"))] -- "TARGET_SIMD && BYTES_BIG_ENDIAN -+ "TARGET_FLOAT -+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN) - && (register_operand (operands[0], XImode) - || register_operand (operands[1], XImode))" - "#" - [(set_attr "type" "multiple") -- (set_attr "length" "16,4,4")] -+ (set_attr "arch" "simd,*,*") -+ (set_attr "length" "16,8,8")] - ) - - (define_split - [(set (match_operand:VSTRUCT_2QD 0 "register_operand") - (match_operand:VSTRUCT_2QD 1 "register_operand"))] -- "TARGET_SIMD && reload_completed" -+ "TARGET_FLOAT && reload_completed" - [(const_int 0)] - { - aarch64_simd_emit_reg_reg_move (operands, mode, 2); -@@ -7655,7 +7704,7 @@ - (define_split - [(set (match_operand:OI 0 "register_operand") - (match_operand:OI 1 "register_operand"))] -- "TARGET_SIMD && reload_completed" -+ "TARGET_FLOAT && reload_completed" - [(const_int 0)] - { - aarch64_simd_emit_reg_reg_move (operands, TImode, 2); -@@ -7665,7 +7714,7 @@ - (define_split - [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand") - (match_operand:VSTRUCT_3QD 1 "general_operand"))] -- "TARGET_SIMD && reload_completed" -+ "TARGET_FLOAT && reload_completed" - [(const_int 0)] - { - if (register_operand (operands[0], mode) -@@ -7674,7 +7723,7 @@ - aarch64_simd_emit_reg_reg_move (operands, mode, 3); - DONE; - } -- else if (BYTES_BIG_ENDIAN) -+ else if (!TARGET_SIMD || BYTES_BIG_ENDIAN) - { - int elt_size = GET_MODE_SIZE (mode).to_constant () / ; - machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode; -@@ -7701,7 +7750,7 @@ - (define_split - [(set (match_operand:CI 0 "nonimmediate_operand") - (match_operand:CI 1 "general_operand"))] -- "TARGET_SIMD && reload_completed" -+ "TARGET_FLOAT && reload_completed" - [(const_int 0)] - { - if (register_operand (operands[0], CImode) -@@ -7710,7 +7759,7 @@ - aarch64_simd_emit_reg_reg_move (operands, TImode, 3); - DONE; - } -- else if (BYTES_BIG_ENDIAN) -+ else if (!TARGET_SIMD || BYTES_BIG_ENDIAN) - { - emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0), - simplify_gen_subreg (OImode, operands[1], CImode, 0)); -@@ -7729,7 +7778,7 @@ - (define_split - [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand") - (match_operand:VSTRUCT_4QD 1 "general_operand"))] -- "TARGET_SIMD && reload_completed" -+ "TARGET_FLOAT && reload_completed" - [(const_int 0)] - { - if (register_operand (operands[0], mode) -@@ -7738,7 +7787,7 @@ - aarch64_simd_emit_reg_reg_move (operands, mode, 4); - DONE; - } -- else if (BYTES_BIG_ENDIAN) -+ else if (!TARGET_SIMD || BYTES_BIG_ENDIAN) - { - int elt_size = GET_MODE_SIZE (mode).to_constant () / ; - machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode; -@@ -7759,7 +7808,7 @@ - (define_split - [(set (match_operand:XI 0 "nonimmediate_operand") - (match_operand:XI 1 "general_operand"))] -- "TARGET_SIMD && reload_completed" -+ "TARGET_FLOAT && reload_completed" - [(const_int 0)] - { - if (register_operand (operands[0], XImode) -@@ -7768,7 +7817,7 @@ - aarch64_simd_emit_reg_reg_move (operands, TImode, 4); - DONE; - } -- else if (BYTES_BIG_ENDIAN) -+ else if (!TARGET_SIMD || BYTES_BIG_ENDIAN) - { - emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0), - simplify_gen_subreg (OImode, operands[1], XImode, 0)); -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 02210ed13..b4b646fa0 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -3748,7 +3748,7 @@ aarch64_classify_vector_mode (machine_mode mode) - case E_OImode: - case E_CImode: - case E_XImode: -- return TARGET_SIMD ? VEC_ADVSIMD | VEC_STRUCT : 0; -+ return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT : 0; - - /* Structures of 64-bit Advanced SIMD vectors. */ - case E_V2x8QImode: -@@ -3775,7 +3775,7 @@ aarch64_classify_vector_mode (machine_mode mode) - case E_V4x4HFmode: - case E_V4x2SFmode: - case E_V4x1DFmode: -- return TARGET_SIMD ? VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL : 0; -+ return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL : 0; - - /* Structures of 128-bit Advanced SIMD vectors. */ - case E_V2x16QImode: -@@ -3802,7 +3802,7 @@ aarch64_classify_vector_mode (machine_mode mode) - case E_V4x8HFmode: - case E_V4x4SFmode: - case E_V4x2DFmode: -- return TARGET_SIMD ? VEC_ADVSIMD | VEC_STRUCT : 0; -+ return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT : 0; - - /* 64-bit Advanced SIMD vectors. */ - case E_V8QImode: -@@ -3822,7 +3822,7 @@ aarch64_classify_vector_mode (machine_mode mode) - case E_V8BFmode: - case E_V4SFmode: - case E_V2DFmode: -- return TARGET_SIMD ? VEC_ADVSIMD : 0; -+ return TARGET_FLOAT ? VEC_ADVSIMD : 0; - - default: - return 0; -@@ -4110,7 +4110,8 @@ aarch64_vectorize_related_mode (machine_mode vector_mode, - } - - /* Prefer to use 1 128-bit vector instead of 2 64-bit vectors. */ -- if ((vec_flags & VEC_ADVSIMD) -+ if (TARGET_SIMD -+ && (vec_flags & VEC_ADVSIMD) - && known_eq (nunits, 0U) - && known_eq (GET_MODE_BITSIZE (vector_mode), 64U) - && maybe_ge (GET_MODE_BITSIZE (element_mode) -@@ -4208,7 +4209,7 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode) - - if (GP_REGNUM_P (regno)) - { -- if (vec_flags & VEC_ANY_SVE) -+ if (vec_flags & (VEC_ANY_SVE | VEC_STRUCT)) - return false; - if (known_le (GET_MODE_SIZE (mode), 8)) - return true; -@@ -10884,7 +10885,8 @@ aarch64_classify_address (struct aarch64_address_info *info, - || mode == TImode - || mode == TFmode - || mode == TDmode -- || (BYTES_BIG_ENDIAN && advsimd_struct_p)); -+ || ((!TARGET_SIMD || BYTES_BIG_ENDIAN) -+ && advsimd_struct_p)); - /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode - corresponds to the actual size of the memory being loaded/stored and the - mode of the corresponding addressing mode is half of that. */ -@@ -10914,6 +10916,7 @@ aarch64_classify_address (struct aarch64_address_info *info, - /* On LE, for AdvSIMD, don't support anything other than POST_INC or - REG addressing. */ - if (advsimd_struct_p -+ && TARGET_SIMD - && !BYTES_BIG_ENDIAN - && (code != POST_INC && code != REG)) - return false; -@@ -10976,7 +10979,7 @@ aarch64_classify_address (struct aarch64_address_info *info, - && aarch64_offset_7bit_signed_scaled_p (DImode, offset + 48)); - - /* A 7bit offset check because OImode will emit a ldp/stp -- instruction (only big endian will get here). -+ instruction (only !TARGET_SIMD or big endian will get here). - For ldp/stp instructions, the offset is scaled for the size of a - single element of the pair. */ - if (aarch64_advsimd_partial_struct_mode_p (mode) -@@ -10987,7 +10990,8 @@ aarch64_classify_address (struct aarch64_address_info *info, - return aarch64_offset_7bit_signed_scaled_p (TImode, offset); - - /* Three 9/12 bit offsets checks because CImode will emit three -- ldr/str instructions (only big endian will get here). */ -+ ldr/str instructions (only !TARGET_SIMD or big endian will -+ get here). */ - if (aarch64_advsimd_partial_struct_mode_p (mode) - && known_eq (GET_MODE_SIZE (mode), 24)) - return (aarch64_offset_7bit_signed_scaled_p (DImode, offset) -@@ -12716,18 +12720,16 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, - /* Use aarch64_sve_reload_mem for SVE memory reloads that cannot use - LDR and STR. See the comment at the head of aarch64-sve.md for - more details about the big-endian handling. */ -+ unsigned int vec_flags = aarch64_classify_vector_mode (mode); - if (reg_class_subset_p (rclass, FP_REGS) - && !((REG_P (x) && HARD_REGISTER_P (x)) - || aarch64_simd_valid_immediate (x, NULL)) -- && mode != VNx16QImode) -+ && mode != VNx16QImode -+ && (vec_flags & VEC_SVE_DATA) -+ && ((vec_flags & VEC_PARTIAL) || BYTES_BIG_ENDIAN)) - { -- unsigned int vec_flags = aarch64_classify_vector_mode (mode); -- if ((vec_flags & VEC_SVE_DATA) -- && ((vec_flags & VEC_PARTIAL) || BYTES_BIG_ENDIAN)) -- { -- sri->icode = CODE_FOR_aarch64_sve_reload_mem; -- return NO_REGS; -- } -+ sri->icode = CODE_FOR_aarch64_sve_reload_mem; -+ return NO_REGS; - } - - /* If we have to disable direct literal pool loads and stores because the -@@ -12744,9 +12746,13 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, - /* Without the TARGET_SIMD instructions we cannot move a Q register - to a Q register directly. We need a scratch. */ - if (REG_P (x) -- && (mode == TFmode || mode == TImode || mode == TDmode) -+ && (mode == TFmode -+ || mode == TImode -+ || mode == TDmode -+ || (vec_flags == VEC_ADVSIMD && known_eq (GET_MODE_SIZE (mode), 16))) - && mode == GET_MODE (x) -- && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD -+ && !TARGET_SIMD -+ && FP_REGNUM_P (REGNO (x)) - && reg_class_subset_p (rclass, FP_REGS)) - { - sri->icode = code_for_aarch64_reload_mov (mode); -@@ -12768,6 +12774,28 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, - return NO_REGS; - } - -+/* Implement TARGET_SECONDARY_MEMORY_NEEDED. */ -+ -+static bool -+aarch64_secondary_memory_needed (machine_mode mode, reg_class_t class1, -+ reg_class_t class2) -+{ -+ if (!TARGET_SIMD -+ && reg_classes_intersect_p (class1, FP_REGS) -+ && reg_classes_intersect_p (class2, FP_REGS)) -+ { -+ /* We can't do a 128-bit FPR-to-FPR move without TARGET_SIMD, -+ so we can't easily split a move involving tuples of 128-bit -+ vectors. Force the copy through memory instead. -+ -+ (Tuples of 64-bit vectors are fine.) */ -+ unsigned int vec_flags = aarch64_classify_vector_mode (mode); -+ if (vec_flags == (VEC_ADVSIMD | VEC_STRUCT)) -+ return true; -+ } -+ return false; -+} -+ - static bool - aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) - { -@@ -13311,7 +13339,7 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed) - if (VECTOR_MODE_P (mode)) - { - unsigned int vec_flags = aarch64_classify_vector_mode (mode); -- if (vec_flags & VEC_ADVSIMD) -+ if (TARGET_SIMD && (vec_flags & VEC_ADVSIMD)) - { - /* The select-operand-high-half versions of the instruction have the - same cost as the three vector version - don't add the costs of the -@@ -14257,7 +14285,7 @@ cost_minus: - { - /* SUBL2 and SUBW2. */ - unsigned int vec_flags = aarch64_classify_vector_mode (mode); -- if (vec_flags & VEC_ADVSIMD) -+ if (TARGET_SIMD && (vec_flags & VEC_ADVSIMD)) - { - /* The select-operand-high-half versions of the sub instruction - have the same cost as the regular three vector version - -@@ -14359,7 +14387,7 @@ cost_plus: - { - /* ADDL2 and ADDW2. */ - unsigned int vec_flags = aarch64_classify_vector_mode (mode); -- if (vec_flags & VEC_ADVSIMD) -+ if (TARGET_SIMD && (vec_flags & VEC_ADVSIMD)) - { - /* The select-operand-high-half versions of the add instruction - have the same cost as the regular three vector version - -@@ -15284,7 +15312,9 @@ aarch64_register_move_cost (machine_mode mode, - return aarch64_register_move_cost (mode, from, GENERAL_REGS) - + aarch64_register_move_cost (mode, GENERAL_REGS, to); - -- if (known_eq (GET_MODE_SIZE (mode), 16)) -+ unsigned int vec_flags = aarch64_classify_vector_mode (mode); -+ if (vec_flags != (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL) -+ && known_eq (GET_MODE_SIZE (mode), 16)) - { - /* 128-bit operations on general registers require 2 instructions. */ - if (from == GENERAL_REGS && to == GENERAL_REGS) -@@ -15312,6 +15342,16 @@ aarch64_register_move_cost (machine_mode mode, - else if (to == GENERAL_REGS) - return regmove_cost->FP2GP; - -+ if (!TARGET_SIMD && vec_flags == (VEC_ADVSIMD | VEC_STRUCT)) -+ { -+ /* Needs a round-trip through memory, which can use LDP/STP for pairs. -+ The cost must be greater than 2 units to indicate that direct -+ moves aren't possible. */ -+ auto per_vector = (aarch64_tune_params.memmov_cost.load_fp -+ + aarch64_tune_params.memmov_cost.store_fp); -+ return MIN (CEIL (per_vector, 2), 4); -+ } -+ - return regmove_cost->FP2FP; - } - -@@ -21504,6 +21544,9 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, - if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT)) - return false; - -+ if ((vec_flags & VEC_ADVSIMD) && !TARGET_SIMD) -+ return false; -+ - if (vec_flags & VEC_SVE_PRED) - return aarch64_sve_pred_valid_immediate (op, info); - -@@ -24430,7 +24473,7 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) - std::swap (d->op0, d->op1); - } - -- if ((d->vec_flags == VEC_ADVSIMD -+ if (((d->vec_flags == VEC_ADVSIMD && TARGET_SIMD) - || d->vec_flags == VEC_SVE_DATA - || d->vec_flags == (VEC_SVE_DATA | VEC_PARTIAL) - || d->vec_flags == VEC_SVE_PRED) -@@ -27977,6 +28020,9 @@ aarch64_libgcc_floating_mode_supported_p - #undef TARGET_SECONDARY_RELOAD - #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload - -+#undef TARGET_SECONDARY_MEMORY_NEEDED -+#define TARGET_SECONDARY_MEMORY_NEEDED aarch64_secondary_memory_needed -+ - #undef TARGET_SHIFT_TRUNCATION_MASK - #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask - -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index 8757a962f..c0cc91756 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -374,8 +374,11 @@ - ;; Attributes of the architecture required to support the instruction (or - ;; alternative). This attribute is used to compute attribute "enabled", use type - ;; "any" to enable an alternative in all cases. -+;; -+;; As a convenience, "fp_q" means "fp" + the ability to move between -+;; Q registers and is equivalent to "simd". - --(define_enum "arches" [ any rcpc8_4 fp simd sve fp16]) -+(define_enum "arches" [ any rcpc8_4 fp fp_q simd sve fp16]) - - (define_enum_attr "arch" "arches" (const_string "any")) - -@@ -403,7 +406,7 @@ - (and (eq_attr "arch" "fp") - (match_test "TARGET_FLOAT")) - -- (and (eq_attr "arch" "simd") -+ (and (eq_attr "arch" "fp_q, simd") - (match_test "TARGET_SIMD")) - - (and (eq_attr "arch" "fp16") -@@ -6768,8 +6771,8 @@ - ) - - (define_expand "@aarch64_reload_mov" -- [(set (match_operand:TX 0 "register_operand" "=w") -- (match_operand:TX 1 "register_operand" "w")) -+ [(set (match_operand:VTX 0 "register_operand" "=w") -+ (match_operand:VTX 1 "register_operand" "w")) - (clobber (match_operand:DI 2 "register_operand" "=&r")) - ] - "TARGET_FLOAT" -diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md -index d0cd1b788..a8a39b65a 100644 ---- a/gcc/config/aarch64/iterators.md -+++ b/gcc/config/aarch64/iterators.md -@@ -313,6 +313,8 @@ - - (define_mode_iterator TX [TI TF TD]) - -+(define_mode_iterator VTX [TI TF TD V16QI V8HI V4SI V2DI V8HF V4SF V2DF V8BF]) -+ - ;; Advanced SIMD opaque structure modes. - (define_mode_iterator VSTRUCT [OI CI XI]) - -diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c -new file mode 100644 -index 000000000..7e705e119 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c -@@ -0,0 +1,7 @@ -+/* { dg-options "-O2" } */ -+ -+#pragma GCC target "+nosimd+fp" -+ -+#include "ldp_stp_6.c" -+ -+/* { dg-final { scan-assembler "stp\td\[0-9\]+, d\[0-9\]+, \\\[x\[0-9\]+\\\]" } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c -new file mode 100644 -index 000000000..462e3c9aa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c -@@ -0,0 +1,7 @@ -+/* { dg-options "-O2" } */ -+ -+#pragma GCC target "+nosimd+fp" -+ -+#include "ldp_stp_8.c" -+ -+/* { dg-final { scan-assembler-times "ldp\td\[0-9\], d\[0-9\]+, \\\[x\[0-9\]+\\\]" 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c -new file mode 100644 -index 000000000..283c56dd2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c -@@ -0,0 +1,13 @@ -+/* { dg-options "-O2" } */ -+ -+#pragma GCC target "+nosimd+fp" -+ -+void -+foo (__Float32x4_t *ptr) -+{ -+ ptr[0] = ptr[2]; -+ ptr[1] = ptr[3]; -+} -+ -+/* { dg-final { scan-assembler {\tldp\tq[0-9]+, q[0-9]+} } } */ -+/* { dg-final { scan-assembler {\tstp\tq[0-9]+, q[0-9]+} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c -new file mode 100644 -index 000000000..b14976cfe ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c -@@ -0,0 +1,16 @@ -+/* { dg-options "-O2" } */ -+ -+#pragma GCC target "+nosimd+fp" -+ -+void -+foo (char *char_ptr) -+{ -+ __Float64x2_t *ptr = (__Float64x2_t *)(char_ptr + 1); -+ asm volatile ("" :: -+ "w" (ptr[1]), -+ "w" (ptr[2]), -+ "w" (ptr[3]), -+ "w" (ptr[4])); -+} -+ -+/* { dg-final { scan-assembler-times {\tldp\tq[0-9]+, q[0-9]+} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c -new file mode 100644 -index 000000000..a99426eb2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c -@@ -0,0 +1,16 @@ -+/* { dg-options "-O2" } */ -+ -+#pragma GCC target "+nosimd+fp" -+ -+void -+foo (char *char_ptr) -+{ -+ __Float64x2_t *ptr = (__Float64x2_t *)(char_ptr + 1); -+ asm volatile ("" : -+ "=w" (ptr[1]), -+ "=w" (ptr[2]), -+ "=w" (ptr[3]), -+ "=w" (ptr[4])); -+} -+ -+/* { dg-final { scan-assembler-times {\tstp\tq[0-9]+, q[0-9]+} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c -index 8a6afb13b..cac4241b0 100644 ---- a/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c -@@ -80,3 +80,24 @@ fpr_to_gpr (v16qi q0) - x0 = q0; - asm volatile ("" :: "r" (x0)); - } -+ -+/* -+** gpr_to_gpr: -+** ( -+** mov x0, x2 -+** mov x1, x3 -+** | -+** mov x1, x3 -+** mov x0, x2 -+** ) -+** ret -+*/ -+void -+gpr_to_gpr () -+{ -+ register v16qi x0 asm ("x0"); -+ register v16qi x2 asm ("x2"); -+ asm volatile ("" : "=r" (x2)); -+ x0 = x2; -+ asm volatile ("" :: "r" (x0)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_2.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_2.c -new file mode 100644 -index 000000000..08a0a19b5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_2.c -@@ -0,0 +1,27 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+ -+#pragma GCC target "+nosimd+fp" -+ -+#define TEST_GENERAL(TYPE) \ -+ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \ -+ TYPE zero_##TYPE () { return (TYPE) {}; } \ -+ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \ -+ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; } -+ -+TEST_GENERAL (__Int8x16_t) -+TEST_GENERAL (__Int16x8_t) -+TEST_GENERAL (__Int32x4_t) -+TEST_GENERAL (__Int64x2_t) -+TEST_GENERAL (__Bfloat16x8_t) -+TEST_GENERAL (__Float16x8_t) -+TEST_GENERAL (__Float32x4_t) -+TEST_GENERAL (__Float64x2_t) -+ -+__Int8x16_t const_s8x8 () { return (__Int8x16_t) { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; } -+__Int16x8_t const_s16x4 () { return (__Int16x8_t) { 1, 0, 1, 0, 1, 0, 1, 0 }; } -+__Int32x4_t const_s32x2 () { return (__Int32x4_t) { 1, 2, 3, 4 }; } -+__Int64x2_t const_s64x1 () { return (__Int64x2_t) { 100, 100 }; } -+__Float16x8_t const_f16x4 () { return (__Float16x8_t) { 2, 2, 2, 2, 2, 2, 2, 2 }; } -+__Float32x4_t const_f32x2 () { return (__Float32x4_t) { 1, 2, 1, 2 }; } -+__Float64x2_t const_f64x1 () { return (__Float64x2_t) { 32, 32 }; } -diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_3.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_3.c -new file mode 100644 -index 000000000..d43b994c1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_3.c -@@ -0,0 +1,30 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nosimd+fp" -+ -+#define TEST_VECTOR(TYPE) \ -+ TYPE \ -+ test_##TYPE (void) \ -+ { \ -+ typedef TYPE v __attribute__((aligned(1))); \ -+ register v *ptr asm ("x0"); \ -+ asm volatile ("" : "=r" (ptr)); \ -+ return *ptr; \ -+ } -+ -+TEST_VECTOR (__Int8x16_t) -+TEST_VECTOR (__Int16x8_t) -+TEST_VECTOR (__Int32x4_t) -+TEST_VECTOR (__Int64x2_t) -+TEST_VECTOR (__Bfloat16x8_t) -+TEST_VECTOR (__Float16x8_t) -+TEST_VECTOR (__Float32x4_t) -+TEST_VECTOR (__Float64x2_t) -+ -+/* -+** test___Int8x16_t: -+** ldr q0, \[x0\] -+** ret -+*/ -diff --git a/gcc/testsuite/gcc.target/aarch64/movv2di_1.c b/gcc/testsuite/gcc.target/aarch64/movv2di_1.c -new file mode 100644 -index 000000000..e3b55fd52 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movv2di_1.c -@@ -0,0 +1,103 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nothing+nosimd+fp" -+ -+typedef long long v2di __attribute__((vector_size(16))); -+ -+/* -+** fpr_to_fpr: -+** sub sp, sp, #16 -+** str q1, \[sp\] -+** ldr q0, \[sp\] -+** add sp, sp, #?16 -+** ret -+*/ -+v2di -+fpr_to_fpr (v2di q0, v2di q1) -+{ -+ return q1; -+} -+ -+/* -+** gpr_to_fpr: { target aarch64_little_endian } -+** fmov d0, x0 -+** fmov v0.d\[1\], x1 -+** ret -+*/ -+/* -+** gpr_to_fpr: { target aarch64_big_endian } -+** fmov d0, x1 -+** fmov v0.d\[1\], x0 -+** ret -+*/ -+v2di -+gpr_to_fpr () -+{ -+ register v2di x0 asm ("x0"); -+ asm volatile ("" : "=r" (x0)); -+ return x0; -+} -+ -+/* -+** zero_to_fpr: -+** fmov d0, xzr -+** ret -+*/ -+v2di -+zero_to_fpr () -+{ -+ return (v2di) {}; -+} -+ -+/* -+** fpr_to_gpr: { target aarch64_little_endian } -+** ( -+** fmov x0, d0 -+** fmov x1, v0.d\[1\] -+** | -+** fmov x1, v0.d\[1\] -+** fmov x0, d0 -+** ) -+** ret -+*/ -+/* -+** fpr_to_gpr: { target aarch64_big_endian } -+** ( -+** fmov x1, d0 -+** fmov x0, v0.d\[1\] -+** | -+** fmov x0, v0.d\[1\] -+** fmov x1, d0 -+** ) -+** ret -+*/ -+void -+fpr_to_gpr (v2di q0) -+{ -+ register v2di x0 asm ("x0"); -+ x0 = q0; -+ asm volatile ("" :: "r" (x0)); -+} -+ -+/* -+** gpr_to_gpr: -+** ( -+** mov x0, x2 -+** mov x1, x3 -+** | -+** mov x1, x3 -+** mov x0, x2 -+** ) -+** ret -+*/ -+void -+gpr_to_gpr () -+{ -+ register v2di x0 asm ("x0"); -+ register v2di x2 asm ("x2"); -+ asm volatile ("" : "=r" (x2)); -+ x0 = x2; -+ asm volatile ("" :: "r" (x0)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movv2x16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv2x16qi_1.c -new file mode 100644 -index 000000000..90e3b426d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movv2x16qi_1.c -@@ -0,0 +1,40 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC aarch64 "arm_neon.h" -+ -+#pragma GCC target "+nosimd+fp" -+ -+#define TEST_VECTOR(TYPE) \ -+ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \ -+ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \ -+ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; } -+ -+TEST_VECTOR (int8x16x2_t) -+TEST_VECTOR (int16x8x2_t) -+TEST_VECTOR (int32x4x2_t) -+TEST_VECTOR (int64x2x2_t) -+TEST_VECTOR (float16x8x2_t) -+TEST_VECTOR (bfloat16x8x2_t) -+TEST_VECTOR (float32x4x2_t) -+TEST_VECTOR (float64x2x2_t) -+ -+/* -+** mov_int8x16x2_t: -+** sub sp, sp, #32 -+** stp q2, q3, \[sp\] -+** ldp q0, q1, \[sp\] -+** add sp, sp, #?32 -+** ret -+*/ -+/* -+** load_int8x16x2_t: -+** ldp q0, q1, \[x0\] -+** ret -+*/ -+/* -+** store_int8x16x2_t: { xfail *-*-* } -+** stp q0, q1, \[x0\] -+** ret -+*/ -diff --git a/gcc/testsuite/gcc.target/aarch64/movv2x8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv2x8qi_1.c -new file mode 100644 -index 000000000..883a0ea71 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movv2x8qi_1.c -@@ -0,0 +1,38 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC aarch64 "arm_neon.h" -+ -+#pragma GCC target "+nosimd+fp" -+ -+#define TEST_VECTOR(TYPE) \ -+ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \ -+ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \ -+ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; } -+ -+TEST_VECTOR (int8x8x2_t) -+TEST_VECTOR (int16x4x2_t) -+TEST_VECTOR (int32x2x2_t) -+TEST_VECTOR (int64x1x2_t) -+TEST_VECTOR (float16x4x2_t) -+TEST_VECTOR (bfloat16x4x2_t) -+TEST_VECTOR (float32x2x2_t) -+TEST_VECTOR (float64x1x2_t) -+ -+/* -+** mov_int8x8x2_t: -+** fmov d0, d2 -+** fmov d1, d3 -+** ret -+*/ -+/* -+** load_int8x8x2_t: -+** ldp d0, d1, \[x0\] -+** ret -+*/ -+/* -+** store_int8x8x2_t: -+** stp d0, d1, \[x0\] -+** ret -+*/ -diff --git a/gcc/testsuite/gcc.target/aarch64/movv3x16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv3x16qi_1.c -new file mode 100644 -index 000000000..070a596bf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movv3x16qi_1.c -@@ -0,0 +1,44 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC aarch64 "arm_neon.h" -+ -+#pragma GCC target "+nosimd+fp" -+ -+#define TEST_VECTOR(TYPE) \ -+ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \ -+ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \ -+ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; } -+ -+TEST_VECTOR (int8x16x3_t) -+TEST_VECTOR (int16x8x3_t) -+TEST_VECTOR (int32x4x3_t) -+TEST_VECTOR (int64x2x3_t) -+TEST_VECTOR (float16x8x3_t) -+TEST_VECTOR (bfloat16x8x3_t) -+TEST_VECTOR (float32x4x3_t) -+TEST_VECTOR (float64x2x3_t) -+ -+/* -+** mov_int8x16x3_t: -+** sub sp, sp, #48 -+** stp q3, q4, \[sp\] -+** str q5, \[sp, #?32\] -+** ldp q0, q1, \[sp\] -+** ldr q2, \[sp, #?32\] -+** add sp, sp, #?48 -+** ret -+*/ -+/* -+** load_int8x16x3_t: -+** ldp q0, q1, \[x0\] -+** ldr q2, \[x0, #?32\] -+** ret -+*/ -+/* -+** store_int8x16x3_t: { xfail *-*-* } -+** stp q0, q1, \[x0\] -+** stp q2, \[x0, #?32\] -+** ret -+*/ -diff --git a/gcc/testsuite/gcc.target/aarch64/movv3x8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv3x8qi_1.c -new file mode 100644 -index 000000000..4b873d749 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movv3x8qi_1.c -@@ -0,0 +1,41 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC aarch64 "arm_neon.h" -+ -+#pragma GCC target "+nosimd+fp" -+ -+#define TEST_VECTOR(TYPE) \ -+ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \ -+ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \ -+ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; } -+ -+TEST_VECTOR (int8x8x3_t) -+TEST_VECTOR (int16x4x3_t) -+TEST_VECTOR (int32x2x3_t) -+TEST_VECTOR (int64x1x3_t) -+TEST_VECTOR (float16x4x3_t) -+TEST_VECTOR (bfloat16x4x3_t) -+TEST_VECTOR (float32x2x3_t) -+TEST_VECTOR (float64x1x3_t) -+ -+/* -+** mov_int8x8x3_t: -+** fmov d0, d3 -+** fmov d1, d4 -+** fmov d2, d5 -+** ret -+*/ -+/* -+** load_int8x8x3_t: -+** ldp d0, d1, \[x0\] -+** ldr d2, \[x0, #?16\] -+** ret -+*/ -+/* -+** store_int8x8x3_t: -+** stp d0, d1, \[x0\] -+** str d2, \[x0, #?16\] -+** ret -+*/ -diff --git a/gcc/testsuite/gcc.target/aarch64/movv4x16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv4x16qi_1.c -new file mode 100644 -index 000000000..6a517b4fe ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movv4x16qi_1.c -@@ -0,0 +1,44 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC aarch64 "arm_neon.h" -+ -+#pragma GCC target "+nosimd+fp" -+ -+#define TEST_VECTOR(TYPE) \ -+ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \ -+ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \ -+ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; } -+ -+TEST_VECTOR (int8x16x4_t) -+TEST_VECTOR (int16x8x4_t) -+TEST_VECTOR (int32x4x4_t) -+TEST_VECTOR (int64x2x4_t) -+TEST_VECTOR (float16x8x4_t) -+TEST_VECTOR (bfloat16x8x4_t) -+TEST_VECTOR (float32x4x4_t) -+TEST_VECTOR (float64x2x4_t) -+ -+/* -+** mov_int8x16x4_t: -+** sub sp, sp, #64 -+** stp q4, q5, \[sp\] -+** stp q6, q7, \[sp, #?32\] -+** ldp q0, q1, \[sp\] -+** ldp q2, q3, \[sp, #?32\] -+** add sp, sp, #?64 -+** ret -+*/ -+/* -+** load_int8x16x4_t: -+** ldp q0, q1, \[x0\] -+** ldp q2, q3, \[x0, #?32\] -+** ret -+*/ -+/* -+** store_int8x16x4_t: { xfail *-*-* } -+** stp q0, q1, \[x0\] -+** stp q2, q3, \[x0, #?32\] -+** ret -+*/ -diff --git a/gcc/testsuite/gcc.target/aarch64/movv4x8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv4x8qi_1.c -new file mode 100644 -index 000000000..f096be4a5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movv4x8qi_1.c -@@ -0,0 +1,42 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC aarch64 "arm_neon.h" -+ -+#pragma GCC target "+nosimd+fp" -+ -+#define TEST_VECTOR(TYPE) \ -+ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \ -+ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \ -+ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; } -+ -+TEST_VECTOR (int8x8x4_t) -+TEST_VECTOR (int16x4x4_t) -+TEST_VECTOR (int32x2x4_t) -+TEST_VECTOR (int64x1x4_t) -+TEST_VECTOR (float16x4x4_t) -+TEST_VECTOR (bfloat16x4x4_t) -+TEST_VECTOR (float32x2x4_t) -+TEST_VECTOR (float64x1x4_t) -+ -+/* -+** mov_int8x8x4_t: -+** fmov d0, d4 -+** fmov d1, d5 -+** fmov d2, d6 -+** fmov d3, d7 -+** ret -+*/ -+/* -+** load_int8x8x4_t: -+** ldp d0, d1, \[x0\] -+** ldp d2, d3, \[x0, #?16\] -+** ret -+*/ -+/* -+** store_int8x8x4_t: -+** stp d0, d1, \[x0\] -+** stp d2, d3, \[x0, #?16\] -+** ret -+*/ -diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c -index 4c97e6fbc..d2b5d8025 100644 ---- a/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c -@@ -53,3 +53,18 @@ fpr_to_gpr (v8qi q0) - x0 = q0; - asm volatile ("" :: "r" (x0)); - } -+ -+/* -+** gpr_to_gpr: -+** mov x0, x1 -+** ret -+*/ -+void -+gpr_to_gpr () -+{ -+ register v8qi x0 asm ("x0"); -+ register v8qi x1 asm ("x1"); -+ asm volatile ("" : "=r" (x1)); -+ x0 = x1; -+ asm volatile ("" :: "r" (x0)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_2.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_2.c -new file mode 100644 -index 000000000..0d8576ffe ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_2.c -@@ -0,0 +1,27 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+ -+#pragma GCC target "+nosimd+fp" -+ -+#define TEST_GENERAL(TYPE) \ -+ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \ -+ TYPE zero_##TYPE () { return (TYPE) {}; } \ -+ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \ -+ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; } -+ -+TEST_GENERAL (__Int8x8_t) -+TEST_GENERAL (__Int16x4_t) -+TEST_GENERAL (__Int32x2_t) -+TEST_GENERAL (__Int64x1_t) -+TEST_GENERAL (__Bfloat16x4_t) -+TEST_GENERAL (__Float16x4_t) -+TEST_GENERAL (__Float32x2_t) -+TEST_GENERAL (__Float64x1_t) -+ -+__Int8x8_t const_s8x8 () { return (__Int8x8_t) { 1, 1, 1, 1, 1, 1, 1, 1 }; } -+__Int16x4_t const_s16x4 () { return (__Int16x4_t) { 1, 0, 1, 0 }; } -+__Int32x2_t const_s32x2 () { return (__Int32x2_t) { 1, 2 }; } -+__Int64x1_t const_s64x1 () { return (__Int64x1_t) { 100 }; } -+__Float16x4_t const_f16x4 () { return (__Float16x4_t) { 2, 2, 2, 2 }; } -+__Float32x2_t const_f32x2 () { return (__Float32x2_t) { 1, 2 }; } -+__Float64x1_t const_f64x1 () { return (__Float64x1_t) { 32 }; } -diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_3.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_3.c -new file mode 100644 -index 000000000..1caa1a788 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_3.c -@@ -0,0 +1,30 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nosimd+fp" -+ -+#define TEST_VECTOR(TYPE) \ -+ TYPE \ -+ test_##TYPE (void) \ -+ { \ -+ typedef TYPE v __attribute__((aligned(1))); \ -+ register v *ptr asm ("x0"); \ -+ asm volatile ("" : "=r" (ptr)); \ -+ return *ptr; \ -+ } -+ -+TEST_VECTOR (__Int8x8_t) -+TEST_VECTOR (__Int16x4_t) -+TEST_VECTOR (__Int32x2_t) -+TEST_VECTOR (__Int64x1_t) -+TEST_VECTOR (__Bfloat16x4_t) -+TEST_VECTOR (__Float16x4_t) -+TEST_VECTOR (__Float32x2_t) -+TEST_VECTOR (__Float64x1_t) -+ -+/* -+** test___Int8x8_t: -+** ldr d0, \[x0\] -+** ret -+*/ -diff --git a/gcc/testsuite/gcc.target/aarch64/vect_unary_2.c b/gcc/testsuite/gcc.target/aarch64/vect_unary_2.c -new file mode 100644 -index 000000000..454ac2771 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/vect_unary_2.c -@@ -0,0 +1,5 @@ -+/* { dg-options "-O3 -fno-math-errno --save-temps" } */ -+ -+#pragma GCC target "+nosimd+fp" -+ -+#include "vect_unary_1.c" --- -2.33.0 - diff --git a/0158-Backport-SME-aarch64-Simplify-output-template-emissi.patch b/0158-Backport-SME-aarch64-Simplify-output-template-emissi.patch deleted file mode 100644 index 6d50608..0000000 --- a/0158-Backport-SME-aarch64-Simplify-output-template-emissi.patch +++ /dev/null @@ -1,213 +0,0 @@ -From b51d3b1af24758534e5a8f3a52a56106b935c485 Mon Sep 17 00:00:00 2001 -From: Kyrylo Tkachov -Date: Wed, 31 May 2023 11:23:23 +0100 -Subject: [PATCH 059/157] [Backport][SME] aarch64: Simplify output template - emission code for a few patterns - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=11bd9b1f8133fc07c267e6d1aee8b01e06c7a750 - -If the output code for a define_insn just does a switch (which_alternative) with no other computation we can almost always -replace it with more compact MD syntax for each alternative in a mult-alternative '@' block. -This patch cleans up some such patterns in the aarch64 backend, making them shorter and more concise. -No behavioural change intended. - -Bootstrapped and tested on aarch64-none-linux-gnu. - -gcc/ChangeLog: - - * config/aarch64/aarch64-simd.md (*aarch64_simd_mov): Rewrite - output template to avoid explicit switch on which_alternative. - (*aarch64_simd_mov): Likewise. - (and3): Likewise. - (ior3): Likewise. - * config/aarch64/aarch64.md (*mov_aarch64): Likewise. ---- - gcc/config/aarch64/aarch64-simd.md | 97 +++++++++--------------------- - gcc/config/aarch64/aarch64.md | 42 ++++--------- - 2 files changed, 40 insertions(+), 99 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md -index ef7fc4ecb..2d688edf5 100644 ---- a/gcc/config/aarch64/aarch64-simd.md -+++ b/gcc/config/aarch64/aarch64-simd.md -@@ -122,28 +122,16 @@ - "TARGET_FLOAT - && (register_operand (operands[0], mode) - || aarch64_simd_reg_or_zero (operands[1], mode))" --{ -- switch (which_alternative) -- { -- case 0: return "ldr\t%d0, %1"; -- case 1: return "str\txzr, %0"; -- case 2: return "str\t%d1, %0"; -- case 3: -- if (TARGET_SIMD) -- return "mov\t%0., %1."; -- return "fmov\t%d0, %d1"; -- case 4: -- if (TARGET_SIMD) -- return "umov\t%0, %1.d[0]"; -- return "fmov\t%x0, %d1"; -- case 5: return "fmov\t%d0, %1"; -- case 6: return "mov\t%0, %1"; -- case 7: -- return aarch64_output_simd_mov_immediate (operands[1], 64); -- case 8: return "fmov\t%d0, xzr"; -- default: gcc_unreachable (); -- } --} -+ "@ -+ ldr\t%d0, %1 -+ str\txzr, %0 -+ str\t%d1, %0 -+ * return TARGET_SIMD ? \"mov\t%0., %1.\" : \"fmov\t%d0, %d1\"; -+ * return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\"; -+ fmov\t%d0, %1 -+ mov\t%0, %1 -+ * return aarch64_output_simd_mov_immediate (operands[1], 64); -+ fmov\t%d0, xzr" - [(set_attr "type" "neon_load1_1reg, store_8, neon_store1_1reg,\ - neon_logic, neon_to_gp, f_mcr,\ - mov_reg, neon_move, f_mcr") -@@ -158,29 +146,16 @@ - "TARGET_FLOAT - && (register_operand (operands[0], mode) - || aarch64_simd_reg_or_zero (operands[1], mode))" --{ -- switch (which_alternative) -- { -- case 0: -- return "ldr\t%q0, %1"; -- case 1: -- return "stp\txzr, xzr, %0"; -- case 2: -- return "str\t%q1, %0"; -- case 3: -- return "mov\t%0., %1."; -- case 4: -- case 5: -- case 6: -- return "#"; -- case 7: -- return aarch64_output_simd_mov_immediate (operands[1], 128); -- case 8: -- return "fmov\t%d0, xzr"; -- default: -- gcc_unreachable (); -- } --} -+ "@ -+ ldr\t%q0, %1 -+ stp\txzr, xzr, %0 -+ str\t%q1, %0 -+ mov\t%0., %1. -+ # -+ # -+ # -+ * return aarch64_output_simd_mov_immediate (operands[1], 128); -+ fmov\t%d0, xzr" - [(set_attr "type" "neon_load1_1reg, store_16, neon_store1_1reg,\ - neon_logic, multiple, multiple,\ - multiple, neon_move, fmov") -@@ -1004,18 +979,10 @@ - (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0") - (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))] - "TARGET_SIMD" -- { -- switch (which_alternative) -- { -- case 0: -- return "and\t%0., %1., %2."; -- case 1: -- return aarch64_output_simd_mov_immediate (operands[2], , -- AARCH64_CHECK_BIC); -- default: -- gcc_unreachable (); -- } -- } -+ "@ -+ and\t%0., %1., %2. -+ * return aarch64_output_simd_mov_immediate (operands[2], ,\ -+ AARCH64_CHECK_BIC);" - [(set_attr "type" "neon_logic")] - ) - -@@ -1025,18 +992,10 @@ - (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0") - (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))] - "TARGET_SIMD" -- { -- switch (which_alternative) -- { -- case 0: -- return "orr\t%0., %1., %2."; -- case 1: -- return aarch64_output_simd_mov_immediate (operands[2], , -- AARCH64_CHECK_ORR); -- default: -- gcc_unreachable (); -- } -- } -+ "@ -+ orr\t%0., %1., %2. -+ * return aarch64_output_simd_mov_immediate (operands[2], ,\ -+ AARCH64_CHECK_ORR);" - [(set_attr "type" "neon_logic")] - ) - -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index c0cc91756..7454a5c77 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -1198,36 +1198,18 @@ - (match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D,Usv,m,m,rZ,w,w,rZ,w"))] - "(register_operand (operands[0], mode) - || aarch64_reg_or_zero (operands[1], mode))" --{ -- switch (which_alternative) -- { -- case 0: -- return "mov\t%w0, %w1"; -- case 1: -- return "mov\t%w0, %1"; -- case 2: -- return aarch64_output_scalar_simd_mov_immediate (operands[1], -- mode); -- case 3: -- return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]); -- case 4: -- return "ldr\t%w0, %1"; -- case 5: -- return "ldr\t%0, %1"; -- case 6: -- return "str\t%w1, %0"; -- case 7: -- return "str\t%1, %0"; -- case 8: -- return TARGET_SIMD ? "umov\t%w0, %1.[0]" : "fmov\t%w0, %s1"; -- case 9: -- return TARGET_SIMD ? "dup\t%0., %w1" : "fmov\t%s0, %w1"; -- case 10: -- return TARGET_SIMD ? "dup\t%0, %1.[0]" : "fmov\t%s0, %s1"; -- default: -- gcc_unreachable (); -- } --} -+ "@ -+ mov\t%w0, %w1 -+ mov\t%w0, %1 -+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], mode); -+ * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]); -+ ldr\t%w0, %1 -+ ldr\t%0, %1 -+ str\t%w1, %0 -+ str\t%1, %0 -+ * return TARGET_SIMD ? \"umov\t%w0, %1.[0]\" : \"fmov\t%w0, %s1\"; -+ * return TARGET_SIMD ? \"dup\t%0., %w1\" : \"fmov\t%s0, %w1\"; -+ * return TARGET_SIMD ? \"dup\t%0, %1.[0]\" : \"fmov\t%s0, %s1\";" - ;; The "mov_imm" type for CNT is just a placeholder. - [(set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4, - store_4,neon_to_gp,neon_from_gp,neon_dup") --- -2.33.0 - diff --git a/0159-Backport-SME-Improve-immediate-expansion-PR106583.patch b/0159-Backport-SME-Improve-immediate-expansion-PR106583.patch deleted file mode 100644 index 4ecbdd8..0000000 --- a/0159-Backport-SME-Improve-immediate-expansion-PR106583.patch +++ /dev/null @@ -1,631 +0,0 @@ -From d5293e2a8db54245553e01ad5d791b7492ad6101 Mon Sep 17 00:00:00 2001 -From: Wilco Dijkstra -Date: Mon, 24 Oct 2022 15:14:14 +0100 -Subject: [PATCH 060/157] [Backport][SME] Improve immediate expansion - [PR106583] - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a096036589d82175a0f729c2dab73c9a527d075d - -Improve immediate expansion of immediates which can be created from a -bitmask immediate and 2 MOVKs. Simplify, refactor and improve efficiency -of bitmask checks. Move various immediate handling functions together -to avoid forward declarations. - -This reduces the number of 4-instruction immediates in SPECINT/FP by 10-15%. - -gcc/ - - PR target/106583 - * config/aarch64/aarch64.cc (aarch64_internal_mov_immediate) - Add support for a bitmask immediate with 2 MOVKs. - (aarch64_check_bitmask): New function after refactorization. - (aarch64_bitmask_imm): Simplify replication of small modes. - Split function into 64-bit only version for efficiency. - (aarch64_move_imm): Move near other immediate functions. - (aarch64_uimm12_shift): Likewise. - (aarch64_clamp_to_uimm12_shift): Likewise. - (aarch64_movk_shift): Likewise. - (aarch64_replicate_bitmask_imm): Likewise. - (aarch64_and_split_imm1): Likewise. - (aarch64_and_split_imm2): Likewise. - (aarch64_and_bitmask_imm): Likewise. - (aarch64_movw_imm): Likewise. - -gcc/testsuite/ - PR target/106583 - * gcc.target/aarch64/pr106583.c: Add new test. ---- - gcc/config/aarch64/aarch64.cc | 485 +++++++++++--------- - gcc/testsuite/gcc.target/aarch64/pr106583.c | 41 ++ - 2 files changed, 301 insertions(+), 225 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/pr106583.c - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index b4b646fa0..cf7736994 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -305,7 +305,6 @@ static bool aarch64_builtin_support_vector_misalignment (machine_mode mode, - static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64); - static bool aarch64_print_address_internal (FILE*, machine_mode, rtx, - aarch64_addr_query_type); --static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val); - - /* The processor for which instructions should be scheduled. */ - enum aarch64_processor aarch64_tune = cortexa53; -@@ -5756,6 +5755,143 @@ aarch64_output_sve_vector_inc_dec (const char *operands, rtx x) - factor, nelts_per_vq); - } - -+/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */ -+ -+static const unsigned HOST_WIDE_INT bitmask_imm_mul[] = -+ { -+ 0x0000000100000001ull, -+ 0x0001000100010001ull, -+ 0x0101010101010101ull, -+ 0x1111111111111111ull, -+ 0x5555555555555555ull, -+ }; -+ -+ -+ -+/* Return true if 64-bit VAL is a valid bitmask immediate. */ -+static bool -+aarch64_bitmask_imm (unsigned HOST_WIDE_INT val) -+{ -+ unsigned HOST_WIDE_INT tmp, mask, first_one, next_one; -+ int bits; -+ -+ /* Check for a single sequence of one bits and return quickly if so. -+ The special cases of all ones and all zeroes returns false. */ -+ tmp = val + (val & -val); -+ -+ if (tmp == (tmp & -tmp)) -+ return (val + 1) > 1; -+ -+ /* Invert if the immediate doesn't start with a zero bit - this means we -+ only need to search for sequences of one bits. */ -+ if (val & 1) -+ val = ~val; -+ -+ /* Find the first set bit and set tmp to val with the first sequence of one -+ bits removed. Return success if there is a single sequence of ones. */ -+ first_one = val & -val; -+ tmp = val & (val + first_one); -+ -+ if (tmp == 0) -+ return true; -+ -+ /* Find the next set bit and compute the difference in bit position. */ -+ next_one = tmp & -tmp; -+ bits = clz_hwi (first_one) - clz_hwi (next_one); -+ mask = val ^ tmp; -+ -+ /* Check the bit position difference is a power of 2, and that the first -+ sequence of one bits fits within 'bits' bits. */ -+ if ((mask >> bits) != 0 || bits != (bits & -bits)) -+ return false; -+ -+ /* Check the sequence of one bits is repeated 64/bits times. */ -+ return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26]; -+} -+ -+ -+/* Return true if VAL is a valid bitmask immediate for MODE. */ -+bool -+aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode) -+{ -+ if (mode == DImode) -+ return aarch64_bitmask_imm (val_in); -+ -+ unsigned HOST_WIDE_INT val = val_in; -+ -+ if (mode == SImode) -+ return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32)); -+ -+ /* Replicate small immediates to fit 64 bits. */ -+ int size = GET_MODE_UNIT_PRECISION (mode); -+ val &= (HOST_WIDE_INT_1U << size) - 1; -+ val *= bitmask_imm_mul[__builtin_clz (size) - 26]; -+ -+ return aarch64_bitmask_imm (val); -+} -+ -+ -+/* Return true if the immediate VAL can be a bitfield immediate -+ by changing the given MASK bits in VAL to zeroes, ones or bits -+ from the other half of VAL. Return the new immediate in VAL2. */ -+static inline bool -+aarch64_check_bitmask (unsigned HOST_WIDE_INT val, -+ unsigned HOST_WIDE_INT &val2, -+ unsigned HOST_WIDE_INT mask) -+{ -+ val2 = val & ~mask; -+ if (val2 != val && aarch64_bitmask_imm (val2)) -+ return true; -+ val2 = val | mask; -+ if (val2 != val && aarch64_bitmask_imm (val2)) -+ return true; -+ val = val & ~mask; -+ val2 = val | (((val >> 32) | (val << 32)) & mask); -+ if (val2 != val && aarch64_bitmask_imm (val2)) -+ return true; -+ val2 = val | (((val >> 16) | (val << 48)) & mask); -+ if (val2 != val && aarch64_bitmask_imm (val2)) -+ return true; -+ return false; -+} -+ -+ -+/* Return true if val is an immediate that can be loaded into a -+ register by a MOVZ instruction. */ -+static bool -+aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode) -+{ -+ if (GET_MODE_SIZE (mode) > 4) -+ { -+ if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val -+ || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val) -+ return 1; -+ } -+ else -+ { -+ /* Ignore sign extension. */ -+ val &= (HOST_WIDE_INT) 0xffffffff; -+ } -+ return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val -+ || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val); -+} -+ -+ -+/* Return true if VAL is an immediate that can be loaded into a -+ register in a single instruction. */ -+bool -+aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode) -+{ -+ scalar_int_mode int_mode; -+ if (!is_a (mode, &int_mode)) -+ return false; -+ -+ if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode)) -+ return 1; -+ return aarch64_bitmask_imm (val, int_mode); -+} -+ -+ - static int - aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, - scalar_int_mode mode) -@@ -5786,7 +5922,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, - emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); - - /* Check if we have to emit a second instruction by checking to see -- if any of the upper 32 bits of the original DI mode value is set. */ -+ if any of the upper 32 bits of the original DI mode value is set. */ - if (val == val2) - return 1; - -@@ -5822,36 +5958,43 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, - one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) + - ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0); - -- if (zero_match != 2 && one_match != 2) -+ if (zero_match < 2 && one_match < 2) - { - /* Try emitting a bitmask immediate with a movk replacing 16 bits. - For a 64-bit bitmask try whether changing 16 bits to all ones or - zeroes creates a valid bitmask. To check any repeated bitmask, - try using 16 bits from the other 32-bit half of val. */ - -- for (i = 0; i < 64; i += 16, mask <<= 16) -- { -- val2 = val & ~mask; -- if (val2 != val && aarch64_bitmask_imm (val2, mode)) -- break; -- val2 = val | mask; -- if (val2 != val && aarch64_bitmask_imm (val2, mode)) -- break; -- val2 = val2 & ~mask; -- val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask); -- if (val2 != val && aarch64_bitmask_imm (val2, mode)) -- break; -- } -- if (i != 64) -- { -- if (generate) -+ for (i = 0; i < 64; i += 16) -+ if (aarch64_check_bitmask (val, val2, mask << i)) -+ { -+ if (generate) -+ { -+ emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); -+ emit_insn (gen_insv_immdi (dest, GEN_INT (i), -+ GEN_INT ((val >> i) & 0xffff))); -+ } -+ return 2; -+ } -+ } -+ -+ /* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */ -+ if (zero_match + one_match == 0) -+ { -+ for (i = 0; i < 48; i += 16) -+ for (int j = i + 16; j < 64; j += 16) -+ if (aarch64_check_bitmask (val, val2, (mask << i) | (mask << j))) - { -- emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); -- emit_insn (gen_insv_immdi (dest, GEN_INT (i), -- GEN_INT ((val >> i) & 0xffff))); -+ if (generate) -+ { -+ emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); -+ emit_insn (gen_insv_immdi (dest, GEN_INT (i), -+ GEN_INT ((val >> i) & 0xffff))); -+ emit_insn (gen_insv_immdi (dest, GEN_INT (j), -+ GEN_INT ((val >> j) & 0xffff))); -+ } -+ return 3; - } -- return 2; -- } - } - - /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which -@@ -5898,6 +6041,99 @@ aarch64_mov128_immediate (rtx imm) - } - - -+/* Return true if val can be encoded as a 12-bit unsigned immediate with -+ a left shift of 0 or 12 bits. */ -+bool -+aarch64_uimm12_shift (HOST_WIDE_INT val) -+{ -+ return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val -+ || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val -+ ); -+} -+ -+/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate -+ that can be created with a left shift of 0 or 12. */ -+static HOST_WIDE_INT -+aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val) -+{ -+ /* Check to see if the value fits in 24 bits, as that is the maximum we can -+ handle correctly. */ -+ gcc_assert ((val & 0xffffff) == val); -+ -+ if (((val & 0xfff) << 0) == val) -+ return val; -+ -+ return val & (0xfff << 12); -+} -+ -+ -+/* Test whether: -+ -+ X = (X & AND_VAL) | IOR_VAL; -+ -+ can be implemented using: -+ -+ MOVK X, #(IOR_VAL >> shift), LSL #shift -+ -+ Return the shift if so, otherwise return -1. */ -+int -+aarch64_movk_shift (const wide_int_ref &and_val, -+ const wide_int_ref &ior_val) -+{ -+ unsigned int precision = and_val.get_precision (); -+ unsigned HOST_WIDE_INT mask = 0xffff; -+ for (unsigned int shift = 0; shift < precision; shift += 16) -+ { -+ if (and_val == ~mask && (ior_val & mask) == ior_val) -+ return shift; -+ mask <<= 16; -+ } -+ return -1; -+} -+ -+/* Create mask of ones, covering the lowest to highest bits set in VAL_IN. -+ Assumed precondition: VAL_IN Is not zero. */ -+ -+unsigned HOST_WIDE_INT -+aarch64_and_split_imm1 (HOST_WIDE_INT val_in) -+{ -+ int lowest_bit_set = ctz_hwi (val_in); -+ int highest_bit_set = floor_log2 (val_in); -+ gcc_assert (val_in != 0); -+ -+ return ((HOST_WIDE_INT_UC (2) << highest_bit_set) - -+ (HOST_WIDE_INT_1U << lowest_bit_set)); -+} -+ -+/* Create constant where bits outside of lowest bit set to highest bit set -+ are set to 1. */ -+ -+unsigned HOST_WIDE_INT -+aarch64_and_split_imm2 (HOST_WIDE_INT val_in) -+{ -+ return val_in | ~aarch64_and_split_imm1 (val_in); -+} -+ -+/* Return true if VAL_IN is a valid 'and' bitmask immediate. */ -+ -+bool -+aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode) -+{ -+ scalar_int_mode int_mode; -+ if (!is_a (mode, &int_mode)) -+ return false; -+ -+ if (aarch64_bitmask_imm (val_in, int_mode)) -+ return false; -+ -+ if (aarch64_move_imm (val_in, int_mode)) -+ return false; -+ -+ unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in); -+ -+ return aarch64_bitmask_imm (imm2, int_mode); -+} -+ - /* Return the number of temporary registers that aarch64_add_offset_1 - would need to add OFFSET to a register. */ - -@@ -10379,207 +10615,6 @@ aarch64_tls_referenced_p (rtx x) - } - - --/* Return true if val can be encoded as a 12-bit unsigned immediate with -- a left shift of 0 or 12 bits. */ --bool --aarch64_uimm12_shift (HOST_WIDE_INT val) --{ -- return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val -- || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val -- ); --} -- --/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate -- that can be created with a left shift of 0 or 12. */ --static HOST_WIDE_INT --aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val) --{ -- /* Check to see if the value fits in 24 bits, as that is the maximum we can -- handle correctly. */ -- gcc_assert ((val & 0xffffff) == val); -- -- if (((val & 0xfff) << 0) == val) -- return val; -- -- return val & (0xfff << 12); --} -- --/* Return true if val is an immediate that can be loaded into a -- register by a MOVZ instruction. */ --static bool --aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode) --{ -- if (GET_MODE_SIZE (mode) > 4) -- { -- if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val -- || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val) -- return 1; -- } -- else -- { -- /* Ignore sign extension. */ -- val &= (HOST_WIDE_INT) 0xffffffff; -- } -- return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val -- || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val); --} -- --/* Test whether: -- -- X = (X & AND_VAL) | IOR_VAL; -- -- can be implemented using: -- -- MOVK X, #(IOR_VAL >> shift), LSL #shift -- -- Return the shift if so, otherwise return -1. */ --int --aarch64_movk_shift (const wide_int_ref &and_val, -- const wide_int_ref &ior_val) --{ -- unsigned int precision = and_val.get_precision (); -- unsigned HOST_WIDE_INT mask = 0xffff; -- for (unsigned int shift = 0; shift < precision; shift += 16) -- { -- if (and_val == ~mask && (ior_val & mask) == ior_val) -- return shift; -- mask <<= 16; -- } -- return -1; --} -- --/* VAL is a value with the inner mode of MODE. Replicate it to fill a -- 64-bit (DImode) integer. */ -- --static unsigned HOST_WIDE_INT --aarch64_replicate_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode) --{ -- unsigned int size = GET_MODE_UNIT_PRECISION (mode); -- while (size < 64) -- { -- val &= (HOST_WIDE_INT_1U << size) - 1; -- val |= val << size; -- size *= 2; -- } -- return val; --} -- --/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */ -- --static const unsigned HOST_WIDE_INT bitmask_imm_mul[] = -- { -- 0x0000000100000001ull, -- 0x0001000100010001ull, -- 0x0101010101010101ull, -- 0x1111111111111111ull, -- 0x5555555555555555ull, -- }; -- -- --/* Return true if val is a valid bitmask immediate. */ -- --bool --aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode) --{ -- unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one; -- int bits; -- -- /* Check for a single sequence of one bits and return quickly if so. -- The special cases of all ones and all zeroes returns false. */ -- val = aarch64_replicate_bitmask_imm (val_in, mode); -- tmp = val + (val & -val); -- -- if (tmp == (tmp & -tmp)) -- return (val + 1) > 1; -- -- /* Replicate 32-bit immediates so we can treat them as 64-bit. */ -- if (mode == SImode) -- val = (val << 32) | (val & 0xffffffff); -- -- /* Invert if the immediate doesn't start with a zero bit - this means we -- only need to search for sequences of one bits. */ -- if (val & 1) -- val = ~val; -- -- /* Find the first set bit and set tmp to val with the first sequence of one -- bits removed. Return success if there is a single sequence of ones. */ -- first_one = val & -val; -- tmp = val & (val + first_one); -- -- if (tmp == 0) -- return true; -- -- /* Find the next set bit and compute the difference in bit position. */ -- next_one = tmp & -tmp; -- bits = clz_hwi (first_one) - clz_hwi (next_one); -- mask = val ^ tmp; -- -- /* Check the bit position difference is a power of 2, and that the first -- sequence of one bits fits within 'bits' bits. */ -- if ((mask >> bits) != 0 || bits != (bits & -bits)) -- return false; -- -- /* Check the sequence of one bits is repeated 64/bits times. */ -- return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26]; --} -- --/* Create mask of ones, covering the lowest to highest bits set in VAL_IN. -- Assumed precondition: VAL_IN Is not zero. */ -- --unsigned HOST_WIDE_INT --aarch64_and_split_imm1 (HOST_WIDE_INT val_in) --{ -- int lowest_bit_set = ctz_hwi (val_in); -- int highest_bit_set = floor_log2 (val_in); -- gcc_assert (val_in != 0); -- -- return ((HOST_WIDE_INT_UC (2) << highest_bit_set) - -- (HOST_WIDE_INT_1U << lowest_bit_set)); --} -- --/* Create constant where bits outside of lowest bit set to highest bit set -- are set to 1. */ -- --unsigned HOST_WIDE_INT --aarch64_and_split_imm2 (HOST_WIDE_INT val_in) --{ -- return val_in | ~aarch64_and_split_imm1 (val_in); --} -- --/* Return true if VAL_IN is a valid 'and' bitmask immediate. */ -- --bool --aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode) --{ -- scalar_int_mode int_mode; -- if (!is_a (mode, &int_mode)) -- return false; -- -- if (aarch64_bitmask_imm (val_in, int_mode)) -- return false; -- -- if (aarch64_move_imm (val_in, int_mode)) -- return false; -- -- unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in); -- -- return aarch64_bitmask_imm (imm2, int_mode); --} -- --/* Return true if val is an immediate that can be loaded into a -- register in a single instruction. */ --bool --aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode) --{ -- scalar_int_mode int_mode; -- if (!is_a (mode, &int_mode)) -- return false; -- -- if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode)) -- return 1; -- return aarch64_bitmask_imm (val, int_mode); --} -- - static bool - aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) - { -diff --git a/gcc/testsuite/gcc.target/aarch64/pr106583.c b/gcc/testsuite/gcc.target/aarch64/pr106583.c -new file mode 100644 -index 000000000..0f9315808 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/pr106583.c -@@ -0,0 +1,41 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O2 --save-temps" } */ -+ -+long f1 (void) -+{ -+ return 0x7efefefefefefeff; -+} -+ -+long f2 (void) -+{ -+ return 0x12345678aaaaaaaa; -+} -+ -+long f3 (void) -+{ -+ return 0x1234cccccccc5678; -+} -+ -+long f4 (void) -+{ -+ return 0x7777123456787777; -+} -+ -+long f5 (void) -+{ -+ return 0x5555555512345678; -+} -+ -+long f6 (void) -+{ -+ return 0x1234bbbb5678bbbb; -+} -+ -+long f7 (void) -+{ -+ return 0x4444123444445678; -+} -+ -+ -+/* { dg-final { scan-assembler-times {\tmovk\t} 14 } } */ -+/* { dg-final { scan-assembler-times {\tmov\t} 7 } } */ --- -2.33.0 - diff --git a/0160-Backport-SME-AArch64-Cleanup-move-immediate-code.patch b/0160-Backport-SME-AArch64-Cleanup-move-immediate-code.patch deleted file mode 100644 index dc06698..0000000 --- a/0160-Backport-SME-AArch64-Cleanup-move-immediate-code.patch +++ /dev/null @@ -1,410 +0,0 @@ -From d76be4acadc0641cc8e795cd6b8a1c3c83b4fdb2 Mon Sep 17 00:00:00 2001 -From: Wilco Dijkstra -Date: Mon, 5 Dec 2022 10:49:25 +0000 -Subject: [PATCH 061/157] [Backport][SME] AArch64: Cleanup move immediate code - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ba1536dac780f3f92c5eab999fda6931f6247fc1 - -Simplify, refactor and improve various move immediate functions. -Allow 32-bit MOVN/I as a valid 64-bit immediate which removes special -cases in aarch64_internal_mov_immediate. Add new constraint so the movdi -pattern only needs a single alternative for move immediate. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_bitmask_imm): Use unsigned type. - (aarch64_is_mov_xn_imm): New function. - (aarch64_move_imm): Refactor, assert mode is SImode or DImode. - (aarch64_internal_mov_immediate): Assert mode is SImode or DImode. - Simplify special cases. - (aarch64_uimm12_shift): Simplify code. - (aarch64_clamp_to_uimm12_shift): Likewise. - (aarch64_movw_imm): Rename to aarch64_is_movz. - (aarch64_float_const_rtx_p): Pass either SImode or DImode to - aarch64_internal_mov_immediate. - (aarch64_rtx_costs): Likewise. - * config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M' - constraints into single 'O'. - (mov_aarch64): Likewise. - * config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned. - (aarch64_bitmask_imm): Likewise. - (aarch64_uimm12_shift): Likewise. - (aarch64_is_mov_xn_imm): New prototype. - * config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates, - limit 'N' to 64-bit only moves. ---- - gcc/config/aarch64/aarch64-protos.h | 7 +- - gcc/config/aarch64/aarch64.cc | 158 ++++++++++++---------------- - gcc/config/aarch64/aarch64.md | 17 ++- - gcc/config/aarch64/constraints.md | 5 + - 4 files changed, 85 insertions(+), 102 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h -index 97984f3ab..3ff1a0163 100644 ---- a/gcc/config/aarch64/aarch64-protos.h -+++ b/gcc/config/aarch64/aarch64-protos.h -@@ -755,7 +755,7 @@ void aarch64_post_cfi_startproc (void); - poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned); - int aarch64_get_condition_code (rtx); - bool aarch64_address_valid_for_prefetch_p (rtx, bool); --bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode); -+bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode); - unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in); - unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in); - bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode); -@@ -793,7 +793,7 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT, - unsigned HOST_WIDE_INT, - unsigned HOST_WIDE_INT); - bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx); --bool aarch64_move_imm (HOST_WIDE_INT, machine_mode); -+bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode); - machine_mode aarch64_sve_int_mode (machine_mode); - opt_machine_mode aarch64_sve_pred_mode (unsigned int); - machine_mode aarch64_sve_pred_mode (machine_mode); -@@ -843,8 +843,9 @@ bool aarch64_sve_float_arith_immediate_p (rtx, bool); - bool aarch64_sve_float_mul_immediate_p (rtx); - bool aarch64_split_dimode_const_store (rtx, rtx); - bool aarch64_symbolic_address_p (rtx); --bool aarch64_uimm12_shift (HOST_WIDE_INT); -+bool aarch64_uimm12_shift (unsigned HOST_WIDE_INT); - int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &); -+bool aarch64_is_mov_xn_imm (unsigned HOST_WIDE_INT); - bool aarch64_use_return_insn_p (void); - const char *aarch64_output_casesi (rtx *); - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index cf7736994..acb659f53 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -5812,12 +5812,10 @@ aarch64_bitmask_imm (unsigned HOST_WIDE_INT val) - - /* Return true if VAL is a valid bitmask immediate for MODE. */ - bool --aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode) -+aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode) - { - if (mode == DImode) -- return aarch64_bitmask_imm (val_in); -- -- unsigned HOST_WIDE_INT val = val_in; -+ return aarch64_bitmask_imm (val); - - if (mode == SImode) - return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32)); -@@ -5856,51 +5854,55 @@ aarch64_check_bitmask (unsigned HOST_WIDE_INT val, - } - - --/* Return true if val is an immediate that can be loaded into a -- register by a MOVZ instruction. */ --static bool --aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode) -+/* Return true if VAL is a valid MOVZ immediate. */ -+static inline bool -+aarch64_is_movz (unsigned HOST_WIDE_INT val) - { -- if (GET_MODE_SIZE (mode) > 4) -- { -- if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val -- || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val) -- return 1; -- } -- else -- { -- /* Ignore sign extension. */ -- val &= (HOST_WIDE_INT) 0xffffffff; -- } -- return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val -- || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val); -+ return (val >> (ctz_hwi (val) & 48)) < 65536; - } - - --/* Return true if VAL is an immediate that can be loaded into a -- register in a single instruction. */ -+/* Return true if immediate VAL can be created by a 64-bit MOVI/MOVN/MOVZ. */ - bool --aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode) -+aarch64_is_mov_xn_imm (unsigned HOST_WIDE_INT val) - { -- scalar_int_mode int_mode; -- if (!is_a (mode, &int_mode)) -- return false; -+ return aarch64_is_movz (val) || aarch64_is_movz (~val) -+ || aarch64_bitmask_imm (val); -+} - -- if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode)) -- return 1; -- return aarch64_bitmask_imm (val, int_mode); -+ -+/* Return true if VAL is an immediate that can be created by a single -+ MOV instruction. */ -+bool -+aarch64_move_imm (unsigned HOST_WIDE_INT val, machine_mode mode) -+{ -+ gcc_assert (mode == SImode || mode == DImode); -+ -+ if (val < 65536) -+ return true; -+ -+ unsigned HOST_WIDE_INT mask = -+ (val >> 32) == 0 || mode == SImode ? 0xffffffff : HOST_WIDE_INT_M1U; -+ -+ if (aarch64_is_movz (val & mask) || aarch64_is_movz (~val & mask)) -+ return true; -+ -+ val = (val & mask) | ((val << 32) & ~mask); -+ return aarch64_bitmask_imm (val); - } - - - static int - aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, -- scalar_int_mode mode) -+ machine_mode mode) - { - int i; - unsigned HOST_WIDE_INT val, val2, mask; - int one_match, zero_match; - int num_insns; - -+ gcc_assert (mode == SImode || mode == DImode); -+ - val = INTVAL (imm); - - if (aarch64_move_imm (val, mode)) -@@ -5910,31 +5912,6 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, - return 1; - } - -- /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff -- (with XXXX non-zero). In that case check to see if the move can be done in -- a smaller mode. */ -- val2 = val & 0xffffffff; -- if (mode == DImode -- && aarch64_move_imm (val2, SImode) -- && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0)) -- { -- if (generate) -- emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); -- -- /* Check if we have to emit a second instruction by checking to see -- if any of the upper 32 bits of the original DI mode value is set. */ -- if (val == val2) -- return 1; -- -- i = (val >> 48) ? 48 : 32; -- -- if (generate) -- emit_insn (gen_insv_immdi (dest, GEN_INT (i), -- GEN_INT ((val >> i) & 0xffff))); -- -- return 2; -- } -- - if ((val >> 32) == 0 || mode == SImode) - { - if (generate) -@@ -5958,24 +5935,31 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, - one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) + - ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0); - -+ /* Try a bitmask immediate and a movk to generate the immediate -+ in 2 instructions. */ -+ - if (zero_match < 2 && one_match < 2) - { -- /* Try emitting a bitmask immediate with a movk replacing 16 bits. -- For a 64-bit bitmask try whether changing 16 bits to all ones or -- zeroes creates a valid bitmask. To check any repeated bitmask, -- try using 16 bits from the other 32-bit half of val. */ -- - for (i = 0; i < 64; i += 16) -- if (aarch64_check_bitmask (val, val2, mask << i)) -- { -- if (generate) -- { -- emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); -- emit_insn (gen_insv_immdi (dest, GEN_INT (i), -- GEN_INT ((val >> i) & 0xffff))); -- } -- return 2; -- } -+ { -+ if (aarch64_check_bitmask (val, val2, mask << i)) -+ break; -+ -+ val2 = val & ~(mask << i); -+ if ((val2 >> 32) == 0 && aarch64_move_imm (val2, DImode)) -+ break; -+ } -+ -+ if (i != 64) -+ { -+ if (generate) -+ { -+ emit_insn (gen_rtx_SET (dest, GEN_INT (val2))); -+ emit_insn (gen_insv_immdi (dest, GEN_INT (i), -+ GEN_INT ((val >> i) & 0xffff))); -+ } -+ return 2; -+ } - } - - /* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */ -@@ -6044,26 +6028,24 @@ aarch64_mov128_immediate (rtx imm) - /* Return true if val can be encoded as a 12-bit unsigned immediate with - a left shift of 0 or 12 bits. */ - bool --aarch64_uimm12_shift (HOST_WIDE_INT val) -+aarch64_uimm12_shift (unsigned HOST_WIDE_INT val) - { -- return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val -- || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val -- ); -+ return val < 4096 || (val & 0xfff000) == val; - } - - /* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate - that can be created with a left shift of 0 or 12. */ - static HOST_WIDE_INT --aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val) -+aarch64_clamp_to_uimm12_shift (unsigned HOST_WIDE_INT val) - { - /* Check to see if the value fits in 24 bits, as that is the maximum we can - handle correctly. */ -- gcc_assert ((val & 0xffffff) == val); -+ gcc_assert (val < 0x1000000); - -- if (((val & 0xfff) << 0) == val) -+ if (val < 4096) - return val; - -- return val & (0xfff << 12); -+ return val & 0xfff000; - } - - -@@ -7211,8 +7193,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) - return; - } - -- aarch64_internal_mov_immediate (dest, imm, true, -- as_a (mode)); -+ aarch64_internal_mov_immediate (dest, imm, true, mode); - } - - /* Return the MEM rtx that provides the canary value that should be used -@@ -11410,9 +11391,7 @@ aarch64_float_const_rtx_p (rtx x) - && SCALAR_FLOAT_MODE_P (mode) - && aarch64_reinterpret_float_as_int (x, &ival)) - { -- scalar_int_mode imode = (mode == HFmode -- ? SImode -- : int_mode_for_mode (mode).require ()); -+ machine_mode imode = known_eq (GET_MODE_SIZE (mode), 8) ? DImode : SImode; - int num_instr = aarch64_internal_mov_immediate - (NULL_RTX, gen_int_mode (ival, imode), false, imode); - return num_instr < 3; -@@ -14049,10 +14028,10 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, - proportionally expensive to the number of instructions - required to build that constant. This is true whether we - are compiling for SPEED or otherwise. */ -- if (!is_a (mode, &int_mode)) -- int_mode = word_mode; -+ machine_mode imode = known_le (GET_MODE_SIZE (mode), 4) -+ ? SImode : DImode; - *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate -- (NULL_RTX, x, false, int_mode)); -+ (NULL_RTX, x, false, imode)); - } - return true; - -@@ -14068,9 +14047,8 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, - bool succeed = aarch64_reinterpret_float_as_int (x, &ival); - gcc_assert (succeed); - -- scalar_int_mode imode = (mode == HFmode -- ? SImode -- : int_mode_for_mode (mode).require ()); -+ machine_mode imode = known_eq (GET_MODE_SIZE (mode), 8) -+ ? DImode : SImode; - int ncost = aarch64_internal_mov_immediate - (NULL_RTX, gen_int_mode (ival, imode), false, imode); - *cost += COSTS_N_INSNS (ncost); -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index 7454a5c77..ea94152bf 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -1288,16 +1288,15 @@ - ) - - (define_insn_and_split "*movdi_aarch64" -- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w") -- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))] -+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w") -+ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))] - "(register_operand (operands[0], DImode) - || aarch64_reg_or_zero (operands[1], DImode))" - "@ - mov\\t%x0, %x1 - mov\\t%0, %x1 - mov\\t%x0, %1 -- mov\\t%x0, %1 -- mov\\t%w0, %1 -+ * return aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? \"mov\\t%x0, %1\" : \"mov\\t%w0, %1\"; - # - * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]); - ldr\\t%x0, %1 -@@ -1319,11 +1318,11 @@ - DONE; - }" - ;; The "mov_imm" type for CNTD is just a placeholder. -- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,mov_imm, -+ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm, - load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc, - fmov,neon_move") -- (set_attr "arch" "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd") -- (set_attr "length" "4,4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")] -+ (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd") -+ (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")] - ) - - (define_insn "insv_imm" -@@ -1487,7 +1486,7 @@ - - (define_insn "*mov_aarch64" - [(set (match_operand:DFD 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r") -- (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))] -+ (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,O"))] - "TARGET_FLOAT && (register_operand (operands[0], mode) - || aarch64_reg_or_fp_zero (operands[1], mode))" - "@ -@@ -1502,7 +1501,7 @@ - ldr\\t%x0, %1 - str\\t%x1, %0 - mov\\t%x0, %x1 -- mov\\t%x0, %1" -+ * return aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? \"mov\\t%x0, %1\" : \"mov\\t%w0, %1\";" - [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\ - f_loadd,f_stored,load_8,store_8,mov_reg,\ - fconstd") -diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md -index ee7587cca..750a42fb1 100644 ---- a/gcc/config/aarch64/constraints.md -+++ b/gcc/config/aarch64/constraints.md -@@ -106,6 +106,11 @@ - - (define_constraint "N" - "A constant that can be used with a 64-bit MOV immediate operation." -+ (and (match_code "const_int") -+ (match_test "aarch64_is_mov_xn_imm (ival)"))) -+ -+(define_constraint "O" -+ "A constant that can be used with a 32 or 64-bit MOV immediate operation." - (and (match_code "const_int") - (match_test "aarch64_move_imm (ival, DImode)"))) - --- -2.33.0 - diff --git a/0161-Backport-SME-AArch64-convert-some-patterns-to-compac.patch b/0161-Backport-SME-AArch64-convert-some-patterns-to-compac.patch deleted file mode 100644 index 6ccc4c3..0000000 --- a/0161-Backport-SME-AArch64-convert-some-patterns-to-compac.patch +++ /dev/null @@ -1,229 +0,0 @@ -From 5db3e7b68d5a443e908011b8d53de625ae462f82 Mon Sep 17 00:00:00 2001 -From: Tamar Christina -Date: Mon, 19 Jun 2023 15:55:28 +0100 -Subject: [PATCH 062/157] [Backport][SME] AArch64: convert some patterns to - compact MD syntax - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c72a7b849853716d94e8d313be5dce3c22850113 - -Hi All, - -This converts some patterns in the AArch64 backend to use the new -compact syntax. - -gcc/ChangeLog: - - * config/aarch64/aarch64.md (arches): Add nosimd. - (*mov_aarch64, *movsi_aarch64, *movdi_aarch64): Rewrite to - compact syntax. ---- - gcc/config/aarch64/aarch64.md | 161 ++++++++++++++++------------------ - 1 file changed, 78 insertions(+), 83 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index ea94152bf..5d02da42f 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -378,7 +378,7 @@ - ;; As a convenience, "fp_q" means "fp" + the ability to move between - ;; Q registers and is equivalent to "simd". - --(define_enum "arches" [ any rcpc8_4 fp fp_q simd sve fp16]) -+(define_enum "arches" [ any rcpc8_4 fp fp_q simd nosimd sve fp16]) - - (define_enum_attr "arch" "arches" (const_string "any")) - -@@ -409,6 +409,9 @@ - (and (eq_attr "arch" "fp_q, simd") - (match_test "TARGET_SIMD")) - -+ (and (eq_attr "arch" "nosimd") -+ (match_test "!TARGET_SIMD")) -+ - (and (eq_attr "arch" "fp16") - (match_test "TARGET_FP_F16INST")) - -@@ -1194,26 +1197,27 @@ - ) - - (define_insn "*mov_aarch64" -- [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, w,r ,r,w, m,m,r,w,w") -- (match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D,Usv,m,m,rZ,w,w,rZ,w"))] -+ [(set (match_operand:SHORT 0 "nonimmediate_operand") -+ (match_operand:SHORT 1 "aarch64_mov_operand"))] - "(register_operand (operands[0], mode) - || aarch64_reg_or_zero (operands[1], mode))" -- "@ -- mov\t%w0, %w1 -- mov\t%w0, %1 -- * return aarch64_output_scalar_simd_mov_immediate (operands[1], mode); -- * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]); -- ldr\t%w0, %1 -- ldr\t%0, %1 -- str\t%w1, %0 -- str\t%1, %0 -- * return TARGET_SIMD ? \"umov\t%w0, %1.[0]\" : \"fmov\t%w0, %s1\"; -- * return TARGET_SIMD ? \"dup\t%0., %w1\" : \"fmov\t%s0, %w1\"; -- * return TARGET_SIMD ? \"dup\t%0, %1.[0]\" : \"fmov\t%s0, %s1\";" -- ;; The "mov_imm" type for CNT is just a placeholder. -- [(set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4, -- store_4,neon_to_gp,neon_from_gp,neon_dup") -- (set_attr "arch" "*,*,simd,sve,*,*,*,*,*,*,*")] -+ {@ [cons: =0, 1; attrs: type, arch] -+ [r, r ; mov_reg , * ] mov\t%w0, %w1 -+ [r, M ; mov_imm , * ] mov\t%w0, %1 -+ [w, D; neon_move , simd ] << aarch64_output_scalar_simd_mov_immediate (operands[1], mode); -+ /* The "mov_imm" type for CNT is just a placeholder. */ -+ [r, Usv ; mov_imm , sve ] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]); -+ [r, m ; load_4 , * ] ldr\t%w0, %1 -+ [w, m ; load_4 , * ] ldr\t%0, %1 -+ [m, r Z ; store_4 , * ] str\\t%w1, %0 -+ [m, w ; store_4 , * ] str\t%1, %0 -+ [r, w ; neon_to_gp , simd ] umov\t%w0, %1.[0] -+ [r, w ; neon_to_gp , nosimd] fmov\t%w0, %s1 /*foo */ -+ [w, r Z ; neon_from_gp, simd ] dup\t%0., %w1 -+ [w, r Z ; neon_from_gp, nosimd] fmov\t%s0, %w1 -+ [w, w ; neon_dup , simd ] dup\t%0, %1.[0] -+ [w, w ; neon_dup , nosimd] fmov\t%s0, %s1 -+ } - ) - - (define_expand "mov" -@@ -1250,79 +1254,70 @@ - ) - - (define_insn_and_split "*movsi_aarch64" -- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m, r, r, r, w,r,w, w") -- (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))] -+ [(set (match_operand:SI 0 "nonimmediate_operand") -+ (match_operand:SI 1 "aarch64_mov_operand"))] - "(register_operand (operands[0], SImode) - || aarch64_reg_or_zero (operands[1], SImode))" -- "@ -- mov\\t%w0, %w1 -- mov\\t%w0, %w1 -- mov\\t%w0, %w1 -- mov\\t%w0, %1 -- # -- * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]); -- ldr\\t%w0, %1 -- ldr\\t%s0, %1 -- str\\t%w1, %0 -- str\\t%s1, %0 -- adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1] -- adr\\t%x0, %c1 -- adrp\\t%x0, %A1 -- fmov\\t%s0, %w1 -- fmov\\t%w0, %s1 -- fmov\\t%s0, %s1 -- * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);" -+ {@ [cons: =0, 1; attrs: type, arch, length] -+ [r k, r ; mov_reg , * , 4] mov\t%w0, %w1 -+ [r , k ; mov_reg , * , 4] ^ -+ [r , M ; mov_imm , * , 4] mov\t%w0, %1 -+ [r , n ; mov_imm , * ,16] # -+ /* The "mov_imm" type for CNT is just a placeholder. */ -+ [r , Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]); -+ [r , m ; load_4 , * , 4] ldr\t%w0, %1 -+ [w , m ; load_4 , fp , 4] ldr\t%s0, %1 -+ [m , r Z; store_4 , * , 4] str\t%w1, %0 -+ [m , w ; store_4 , fp , 4] str\t%s1, %0 -+ [r , Usw; load_4 , * , 8] adrp\t%x0, %A1;ldr\t%w0, [%x0, %L1] -+ [r , Usa; adr , * , 4] adr\t%x0, %c1 -+ [r , Ush; adr , * , 4] adrp\t%x0, %A1 -+ [w , r Z; f_mcr , fp , 4] fmov\t%s0, %w1 -+ [r , w ; f_mrc , fp , 4] fmov\t%w0, %s1 -+ [w , w ; fmov , fp , 4] fmov\t%s0, %s1 -+ [w , Ds ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); -+ } - "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode) - && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" -- [(const_int 0)] -- "{ -- aarch64_expand_mov_immediate (operands[0], operands[1]); -- DONE; -- }" -- ;; The "mov_imm" type for CNT is just a placeholder. -- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4, -- load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move") -- (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd") -- (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4") --] -+ [(const_int 0)] -+ { -+ aarch64_expand_mov_immediate (operands[0], operands[1]); -+ DONE; -+ } - ) - - (define_insn_and_split "*movdi_aarch64" -- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w") -- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))] -+ [(set (match_operand:DI 0 "nonimmediate_operand") -+ (match_operand:DI 1 "aarch64_mov_operand"))] - "(register_operand (operands[0], DImode) - || aarch64_reg_or_zero (operands[1], DImode))" -- "@ -- mov\\t%x0, %x1 -- mov\\t%0, %x1 -- mov\\t%x0, %1 -- * return aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? \"mov\\t%x0, %1\" : \"mov\\t%w0, %1\"; -- # -- * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]); -- ldr\\t%x0, %1 -- ldr\\t%d0, %1 -- str\\t%x1, %0 -- str\\t%d1, %0 -- * return TARGET_ILP32 ? \"adrp\\t%0, %A1\;ldr\\t%w0, [%0, %L1]\" : \"adrp\\t%0, %A1\;ldr\\t%0, [%0, %L1]\"; -- adr\\t%x0, %c1 -- adrp\\t%x0, %A1 -- fmov\\t%d0, %x1 -- fmov\\t%x0, %d1 -- fmov\\t%d0, %d1 -- * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);" -- "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode) -- && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" -- [(const_int 0)] -- "{ -- aarch64_expand_mov_immediate (operands[0], operands[1]); -- DONE; -- }" -- ;; The "mov_imm" type for CNTD is just a placeholder. -- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm, -- load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc, -- fmov,neon_move") -- (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd") -- (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")] -+ {@ [cons: =0, 1; attrs: type, arch, length] -+ [r, r ; mov_reg , * , 4] mov\t%x0, %x1 -+ [k, r ; mov_reg , * , 4] mov\t%0, %x1 -+ [r, k ; mov_reg , * , 4] mov\t%x0, %1 -+ [r, O ; mov_imm , * , 4] << aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? "mov\t%x0, %1" : "mov\t%w0, %1"; -+ [r, n ; mov_imm , * ,16] # -+ /* The "mov_imm" type for CNT is just a placeholder. */ -+ [r, Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]); -+ [r, m ; load_8 , * , 4] ldr\t%x0, %1 -+ [w, m ; load_8 , fp , 4] ldr\t%d0, %1 -+ [m, r Z; store_8 , * , 4] str\t%x1, %0 -+ [m, w ; store_8 , fp , 4] str\t%d1, %0 -+ [r, Usw; load_8 , * , 8] << TARGET_ILP32 ? "adrp\t%0, %A1;ldr\t%w0, [%0, %L1]" : "adrp\t%0, %A1;ldr\t%0, [%0, %L1]"; -+ [r, Usa; adr , * , 4] adr\t%x0, %c1 -+ [r, Ush; adr , * , 4] adrp\t%x0, %A1 -+ [w, r Z; f_mcr , fp , 4] fmov\t%d0, %x1 -+ [r, w ; f_mrc , fp , 4] fmov\t%x0, %d1 -+ [w, w ; fmov , fp , 4] fmov\t%d0, %d1 -+ [w, Dd ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], DImode); -+ } -+ "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode) -+ && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" -+ [(const_int 0)] -+ { -+ aarch64_expand_mov_immediate (operands[0], operands[1]); -+ DONE; -+ } - ) - - (define_insn "insv_imm" --- -2.33.0 - diff --git a/0162-Backport-SME-aarch64-Use-SVE-s-RDVL-instruction.patch b/0162-Backport-SME-aarch64-Use-SVE-s-RDVL-instruction.patch deleted file mode 100644 index f92df2d..0000000 --- a/0162-Backport-SME-aarch64-Use-SVE-s-RDVL-instruction.patch +++ /dev/null @@ -1,792 +0,0 @@ -From 46310765c05cde8732e07bfb0df9f0ec25a34018 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:18 +0000 -Subject: [PATCH 063/157] [Backport][SME] aarch64: Use SVE's RDVL instruction - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=80f47d7bbe38234e1530d27fe5c2f130223ca7a0 - -We didn't previously use SVE's RDVL instruction, since the CNT* -forms are preferred and provide most of the range. However, -there are some cases that RDVL can handle and CNT* can't, -and using RDVL-like instructions becomes important for SME. - -gcc/ - * config/aarch64/aarch64-protos.h (aarch64_sve_rdvl_immediate_p) - (aarch64_output_sve_rdvl): Declare. - * config/aarch64/aarch64.cc (aarch64_sve_cnt_factor_p): New - function, split out from... - (aarch64_sve_cnt_immediate_p): ...here. - (aarch64_sve_rdvl_factor_p): New function. - (aarch64_sve_rdvl_immediate_p): Likewise. - (aarch64_output_sve_rdvl): Likewise. - (aarch64_offset_temporaries): Rewrite the SVE handling to use RDVL - for some cases. - (aarch64_expand_mov_immediate): Handle RDVL immediates. - (aarch64_mov_operand_p): Likewise. - * config/aarch64/constraints.md (Usr): New constraint. - * config/aarch64/aarch64.md (*mov_aarch64): Add an RDVL - alternative. - (*movsi_aarch64, *movdi_aarch64): Likewise. - -gcc/testsuite/ - * gcc.target/aarch64/sve/acle/asm/cntb.c: Tweak expected output. - * gcc.target/aarch64/sve/acle/asm/cnth.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/cntw.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/cntd.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/prfb.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/prfh.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/prfw.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/prfd.c: Likewise. - * gcc.target/aarch64/sve/loop_add_4.c: Expect RDVL to be used - to calculate the -17 and 17 factors. - * gcc.target/aarch64/sve/pcs/stack_clash_1.c: Likewise the 18 factor. ---- - gcc/config/aarch64/aarch64-protos.h | 2 + - gcc/config/aarch64/aarch64.cc | 191 ++++++++++++------ - gcc/config/aarch64/aarch64.md | 3 + - gcc/config/aarch64/constraints.md | 6 + - .../gcc.target/aarch64/sve/acle/asm/cntb.c | 71 +++++-- - .../gcc.target/aarch64/sve/acle/asm/cntd.c | 12 +- - .../gcc.target/aarch64/sve/acle/asm/cnth.c | 20 +- - .../gcc.target/aarch64/sve/acle/asm/cntw.c | 16 +- - .../gcc.target/aarch64/sve/acle/asm/prfb.c | 6 +- - .../gcc.target/aarch64/sve/acle/asm/prfd.c | 4 +- - .../gcc.target/aarch64/sve/acle/asm/prfh.c | 4 +- - .../gcc.target/aarch64/sve/acle/asm/prfw.c | 4 +- - .../gcc.target/aarch64/sve/loop_add_4.c | 6 +- - .../aarch64/sve/pcs/stack_clash_1.c | 3 +- - 14 files changed, 225 insertions(+), 123 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h -index 3ff1a0163..14a568140 100644 ---- a/gcc/config/aarch64/aarch64-protos.h -+++ b/gcc/config/aarch64/aarch64-protos.h -@@ -802,6 +802,7 @@ bool aarch64_sve_mode_p (machine_mode); - HOST_WIDE_INT aarch64_fold_sve_cnt_pat (aarch64_svpattern, unsigned int); - bool aarch64_sve_cnt_immediate_p (rtx); - bool aarch64_sve_scalar_inc_dec_immediate_p (rtx); -+bool aarch64_sve_rdvl_immediate_p (rtx); - bool aarch64_sve_addvl_addpl_immediate_p (rtx); - bool aarch64_sve_vector_inc_dec_immediate_p (rtx); - int aarch64_add_offset_temporaries (rtx); -@@ -814,6 +815,7 @@ char *aarch64_output_sve_prefetch (const char *, rtx, const char *); - char *aarch64_output_sve_cnt_immediate (const char *, const char *, rtx); - char *aarch64_output_sve_cnt_pat_immediate (const char *, const char *, rtx *); - char *aarch64_output_sve_scalar_inc_dec (rtx); -+char *aarch64_output_sve_rdvl (rtx); - char *aarch64_output_sve_addvl_addpl (rtx); - char *aarch64_output_sve_vector_inc_dec (const char *, rtx); - char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode); -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index acb659f53..4194dfc70 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -5520,6 +5520,18 @@ aarch64_fold_sve_cnt_pat (aarch64_svpattern pattern, unsigned int nelts_per_vq) - return -1; - } - -+/* Return true if a single CNT[BHWD] instruction can multiply FACTOR -+ by the number of 128-bit quadwords in an SVE vector. */ -+ -+static bool -+aarch64_sve_cnt_factor_p (HOST_WIDE_INT factor) -+{ -+ /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */ -+ return (IN_RANGE (factor, 2, 16 * 16) -+ && (factor & 1) == 0 -+ && factor <= 16 * (factor & -factor)); -+} -+ - /* Return true if we can move VALUE into a register using a single - CNT[BHWD] instruction. */ - -@@ -5527,11 +5539,7 @@ static bool - aarch64_sve_cnt_immediate_p (poly_int64 value) - { - HOST_WIDE_INT factor = value.coeffs[0]; -- /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */ -- return (value.coeffs[1] == factor -- && IN_RANGE (factor, 2, 16 * 16) -- && (factor & 1) == 0 -- && factor <= 16 * (factor & -factor)); -+ return value.coeffs[1] == factor && aarch64_sve_cnt_factor_p (factor); - } - - /* Likewise for rtx X. */ -@@ -5647,6 +5655,50 @@ aarch64_output_sve_scalar_inc_dec (rtx offset) - -offset_value.coeffs[1], 0); - } - -+/* Return true if a single RDVL instruction can multiply FACTOR by the -+ number of 128-bit quadwords in an SVE vector. */ -+ -+static bool -+aarch64_sve_rdvl_factor_p (HOST_WIDE_INT factor) -+{ -+ return (multiple_p (factor, 16) -+ && IN_RANGE (factor, -32 * 16, 31 * 16)); -+} -+ -+/* Return true if we can move VALUE into a register using a single -+ RDVL instruction. */ -+ -+static bool -+aarch64_sve_rdvl_immediate_p (poly_int64 value) -+{ -+ HOST_WIDE_INT factor = value.coeffs[0]; -+ return value.coeffs[1] == factor && aarch64_sve_rdvl_factor_p (factor); -+} -+ -+/* Likewise for rtx X. */ -+ -+bool -+aarch64_sve_rdvl_immediate_p (rtx x) -+{ -+ poly_int64 value; -+ return poly_int_rtx_p (x, &value) && aarch64_sve_rdvl_immediate_p (value); -+} -+ -+/* Return the asm string for moving RDVL immediate OFFSET into register -+ operand 0. */ -+ -+char * -+aarch64_output_sve_rdvl (rtx offset) -+{ -+ static char buffer[sizeof ("rdvl\t%x0, #-") + 3 * sizeof (int)]; -+ poly_int64 offset_value = rtx_to_poly_int64 (offset); -+ gcc_assert (aarch64_sve_rdvl_immediate_p (offset_value)); -+ -+ int factor = offset_value.coeffs[1]; -+ snprintf (buffer, sizeof (buffer), "rdvl\t%%x0, #%d", factor / 16); -+ return buffer; -+} -+ - /* Return true if we can add VALUE to a register using a single ADDVL - or ADDPL instruction. */ - -@@ -6227,13 +6279,13 @@ aarch64_offset_temporaries (bool add_p, poly_int64 offset) - count += 1; - else if (factor != 0) - { -- factor = abs (factor); -- if (factor > 16 * (factor & -factor)) -- /* Need one register for the CNT result and one for the multiplication -- factor. If necessary, the second temporary can be reused for the -- constant part of the offset. */ -+ factor /= (HOST_WIDE_INT) least_bit_hwi (factor); -+ if (!IN_RANGE (factor, -32, 31)) -+ /* Need one register for the CNT or RDVL result and one for the -+ multiplication factor. If necessary, the second temporary -+ can be reused for the constant part of the offset. */ - return 2; -- /* Need one register for the CNT result (which might then -+ /* Need one register for the CNT or RDVL result (which might then - be shifted). */ - count += 1; - } -@@ -6322,85 +6374,100 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, - /* Otherwise use a CNT-based sequence. */ - else if (factor != 0) - { -- /* Use a subtraction if we have a negative factor. */ -- rtx_code code = PLUS; -- if (factor < 0) -- { -- factor = -factor; -- code = MINUS; -- } -+ /* Calculate CNTB * FACTOR / 16 as CNTB * REL_FACTOR * 2**SHIFT, -+ with negative shifts indicating a shift right. */ -+ HOST_WIDE_INT low_bit = least_bit_hwi (factor); -+ HOST_WIDE_INT rel_factor = factor / low_bit; -+ int shift = exact_log2 (low_bit) - 4; -+ gcc_assert (shift >= -4 && (rel_factor & 1) != 0); -+ -+ /* Set CODE, VAL and SHIFT so that [+-] VAL * 2**SHIFT is -+ equal to CNTB * FACTOR / 16, with CODE being the [+-]. - -- /* Calculate CNTD * FACTOR / 2. First try to fold the division -- into the multiplication. */ -+ We can avoid a multiplication if REL_FACTOR is in the range -+ of RDVL, although there are then various optimizations that -+ we can try on top. */ -+ rtx_code code = PLUS; - rtx val; -- int shift = 0; -- if (factor & 1) -- /* Use a right shift by 1. */ -- shift = -1; -- else -- factor /= 2; -- HOST_WIDE_INT low_bit = factor & -factor; -- if (factor <= 16 * low_bit) -+ if (IN_RANGE (rel_factor, -32, 31)) - { -- if (factor > 16 * 8) -+ /* Try to use an unshifted CNT[BHWD] or RDVL. */ -+ if (aarch64_sve_cnt_factor_p (factor) -+ || aarch64_sve_rdvl_factor_p (factor)) -+ { -+ val = gen_int_mode (poly_int64 (factor, factor), mode); -+ shift = 0; -+ } -+ /* Try to subtract an unshifted CNT[BHWD]. */ -+ else if (aarch64_sve_cnt_factor_p (-factor)) - { -- /* "CNTB Xn, ALL, MUL #FACTOR" is out of range, so calculate -- the value with the minimum multiplier and shift it into -- position. */ -- int extra_shift = exact_log2 (low_bit); -- shift += extra_shift; -- factor >>= extra_shift; -+ code = MINUS; -+ val = gen_int_mode (poly_int64 (-factor, -factor), mode); -+ shift = 0; - } -- val = gen_int_mode (poly_int64 (factor * 2, factor * 2), mode); -+ /* If subtraction is free, prefer to load a positive constant. -+ In the best case this will fit a shifted CNTB. */ -+ else if (src != const0_rtx && rel_factor < 0) -+ { -+ code = MINUS; -+ val = gen_int_mode (-rel_factor * BYTES_PER_SVE_VECTOR, mode); -+ } -+ /* Otherwise use a shifted RDVL or CNT[BHWD]. */ -+ else -+ val = gen_int_mode (rel_factor * BYTES_PER_SVE_VECTOR, mode); - } - else - { -- /* Base the factor on LOW_BIT if we can calculate LOW_BIT -- directly, since that should increase the chances of being -- able to use a shift and add sequence. If LOW_BIT itself -- is out of range, just use CNTD. */ -- if (low_bit <= 16 * 8) -- factor /= low_bit; -+ /* If we can calculate CNTB << SHIFT directly, prefer to do that, -+ since it should increase the chances of being able to use -+ a shift and add sequence for the multiplication. -+ If CNTB << SHIFT is out of range, stick with the current -+ shift factor. */ -+ if (IN_RANGE (low_bit, 2, 16 * 16)) -+ { -+ val = gen_int_mode (poly_int64 (low_bit, low_bit), mode); -+ shift = 0; -+ } - else -- low_bit = 1; -+ val = gen_int_mode (BYTES_PER_SVE_VECTOR, mode); - -- val = gen_int_mode (poly_int64 (low_bit * 2, low_bit * 2), mode); - val = aarch64_force_temporary (mode, temp1, val); - -+ /* Prefer to multiply by a positive factor and subtract rather -+ than multiply by a negative factor and add, since positive -+ values are usually easier to move. */ -+ if (rel_factor < 0 && src != const0_rtx) -+ { -+ rel_factor = -rel_factor; -+ code = MINUS; -+ } -+ - if (can_create_pseudo_p ()) - { -- rtx coeff1 = gen_int_mode (factor, mode); -+ rtx coeff1 = gen_int_mode (rel_factor, mode); - val = expand_mult (mode, val, coeff1, NULL_RTX, true, true); - } - else - { -- /* Go back to using a negative multiplication factor if we have -- no register from which to subtract. */ -- if (code == MINUS && src == const0_rtx) -- { -- factor = -factor; -- code = PLUS; -- } -- rtx coeff1 = gen_int_mode (factor, mode); -+ rtx coeff1 = gen_int_mode (rel_factor, mode); - coeff1 = aarch64_force_temporary (mode, temp2, coeff1); - val = gen_rtx_MULT (mode, val, coeff1); - } - } - -+ /* Multiply by 2 ** SHIFT. */ - if (shift > 0) - { -- /* Multiply by 1 << SHIFT. */ - val = aarch64_force_temporary (mode, temp1, val); - val = gen_rtx_ASHIFT (mode, val, GEN_INT (shift)); - } -- else if (shift == -1) -+ else if (shift < 0) - { -- /* Divide by 2. */ - val = aarch64_force_temporary (mode, temp1, val); -- val = gen_rtx_ASHIFTRT (mode, val, const1_rtx); -+ val = gen_rtx_ASHIFTRT (mode, val, GEN_INT (-shift)); - } - -- /* Calculate SRC +/- CNTD * FACTOR / 2. */ -+ /* Add the result to SRC or subtract the result from SRC. */ - if (src != const0_rtx) - { - val = aarch64_force_temporary (mode, temp1, val); -@@ -7045,7 +7112,9 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) - aarch64_report_sve_required (); - return; - } -- if (base == const0_rtx && aarch64_sve_cnt_immediate_p (offset)) -+ if (base == const0_rtx -+ && (aarch64_sve_cnt_immediate_p (offset) -+ || aarch64_sve_rdvl_immediate_p (offset))) - emit_insn (gen_rtx_SET (dest, imm)); - else - { -@@ -21751,7 +21820,9 @@ aarch64_mov_operand_p (rtx x, machine_mode mode) - if (SYMBOL_REF_P (x) && mode == DImode && CONSTANT_ADDRESS_P (x)) - return true; - -- if (TARGET_SVE && aarch64_sve_cnt_immediate_p (x)) -+ if (TARGET_SVE -+ && (aarch64_sve_cnt_immediate_p (x) -+ || aarch64_sve_rdvl_immediate_p (x))) - return true; - - return aarch64_classify_symbolic_expression (x) -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index 5d02da42f..c0977a3da 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -1207,6 +1207,7 @@ - [w, D; neon_move , simd ] << aarch64_output_scalar_simd_mov_immediate (operands[1], mode); - /* The "mov_imm" type for CNT is just a placeholder. */ - [r, Usv ; mov_imm , sve ] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]); -+ [r, Usr ; mov_imm , sve ] << aarch64_output_sve_rdvl (operands[1]); - [r, m ; load_4 , * ] ldr\t%w0, %1 - [w, m ; load_4 , * ] ldr\t%0, %1 - [m, r Z ; store_4 , * ] str\\t%w1, %0 -@@ -1265,6 +1266,7 @@ - [r , n ; mov_imm , * ,16] # - /* The "mov_imm" type for CNT is just a placeholder. */ - [r , Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]); -+ [r , Usr; mov_imm , sve, 4] << aarch64_output_sve_rdvl (operands[1]); - [r , m ; load_4 , * , 4] ldr\t%w0, %1 - [w , m ; load_4 , fp , 4] ldr\t%s0, %1 - [m , r Z; store_4 , * , 4] str\t%w1, %0 -@@ -1299,6 +1301,7 @@ - [r, n ; mov_imm , * ,16] # - /* The "mov_imm" type for CNT is just a placeholder. */ - [r, Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]); -+ [r, Usr; mov_imm , sve, 4] << aarch64_output_sve_rdvl (operands[1]); - [r, m ; load_8 , * , 4] ldr\t%x0, %1 - [w, m ; load_8 , fp , 4] ldr\t%d0, %1 - [m, r Z; store_8 , * , 4] str\t%x1, %0 -diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md -index 750a42fb1..212a73416 100644 ---- a/gcc/config/aarch64/constraints.md -+++ b/gcc/config/aarch64/constraints.md -@@ -214,6 +214,12 @@ - (and (match_code "const_int") - (match_test "aarch64_high_bits_all_ones_p (ival)"))) - -+(define_constraint "Usr" -+ "@internal -+ A constraint that matches a value produced by RDVL." -+ (and (match_code "const_poly_int") -+ (match_test "aarch64_sve_rdvl_immediate_p (op)"))) -+ - (define_constraint "Usv" - "@internal - A constraint that matches a VG-based constant that can be loaded by -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c -index 8b8fe8e4f..a22d8a28d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c -@@ -51,19 +51,24 @@ PROTO (cntb_15, uint64_t, ()) { return svcntb () * 15; } - */ - PROTO (cntb_16, uint64_t, ()) { return svcntb () * 16; } - --/* Other sequences would be OK. */ - /* - ** cntb_17: --** cntb x0, all, mul #16 --** incb x0 -+** rdvl x0, #17 - ** ret - */ - PROTO (cntb_17, uint64_t, ()) { return svcntb () * 17; } - -+/* -+** cntb_31: -+** rdvl x0, #31 -+** ret -+*/ -+PROTO (cntb_31, uint64_t, ()) { return svcntb () * 31; } -+ - /* - ** cntb_32: --** cntd (x[0-9]+) --** lsl x0, \1, 8 -+** cntb (x[0-9]+) -+** lsl x0, \1, 5 - ** ret - */ - PROTO (cntb_32, uint64_t, ()) { return svcntb () * 32; } -@@ -80,16 +85,16 @@ PROTO (cntb_33, uint64_t, ()) { return svcntb () * 33; } - - /* - ** cntb_64: --** cntd (x[0-9]+) --** lsl x0, \1, 9 -+** cntb (x[0-9]+) -+** lsl x0, \1, 6 - ** ret - */ - PROTO (cntb_64, uint64_t, ()) { return svcntb () * 64; } - - /* - ** cntb_128: --** cntd (x[0-9]+) --** lsl x0, \1, 10 -+** cntb (x[0-9]+) -+** lsl x0, \1, 7 - ** ret - */ - PROTO (cntb_128, uint64_t, ()) { return svcntb () * 128; } -@@ -106,46 +111,70 @@ PROTO (cntb_129, uint64_t, ()) { return svcntb () * 129; } - - /* - ** cntb_m1: --** cntb (x[0-9]+) --** neg x0, \1 -+** rdvl x0, #-1 - ** ret - */ - PROTO (cntb_m1, uint64_t, ()) { return -svcntb (); } - - /* - ** cntb_m13: --** cntb (x[0-9]+), all, mul #13 --** neg x0, \1 -+** rdvl x0, #-13 - ** ret - */ - PROTO (cntb_m13, uint64_t, ()) { return -svcntb () * 13; } - - /* - ** cntb_m15: --** cntb (x[0-9]+), all, mul #15 --** neg x0, \1 -+** rdvl x0, #-15 - ** ret - */ - PROTO (cntb_m15, uint64_t, ()) { return -svcntb () * 15; } - - /* - ** cntb_m16: --** cntb (x[0-9]+), all, mul #16 --** neg x0, \1 -+** rdvl x0, #-16 - ** ret - */ - PROTO (cntb_m16, uint64_t, ()) { return -svcntb () * 16; } - --/* Other sequences would be OK. */ - /* - ** cntb_m17: --** cntb x0, all, mul #16 --** incb x0 --** neg x0, x0 -+** rdvl x0, #-17 - ** ret - */ - PROTO (cntb_m17, uint64_t, ()) { return -svcntb () * 17; } - -+/* -+** cntb_m32: -+** rdvl x0, #-32 -+** ret -+*/ -+PROTO (cntb_m32, uint64_t, ()) { return -svcntb () * 32; } -+ -+/* -+** cntb_m33: -+** rdvl x0, #-32 -+** decb x0 -+** ret -+*/ -+PROTO (cntb_m33, uint64_t, ()) { return -svcntb () * 33; } -+ -+/* -+** cntb_m34: -+** rdvl (x[0-9]+), #-17 -+** lsl x0, \1, #?1 -+** ret -+*/ -+PROTO (cntb_m34, uint64_t, ()) { return -svcntb () * 34; } -+ -+/* -+** cntb_m64: -+** rdvl (x[0-9]+), #-1 -+** lsl x0, \1, #?6 -+** ret -+*/ -+PROTO (cntb_m64, uint64_t, ()) { return -svcntb () * 64; } -+ - /* - ** incb_1: - ** incb x0 -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c -index 0d0ed4849..090a643b4 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c -@@ -54,8 +54,8 @@ PROTO (cntd_16, uint64_t, ()) { return svcntd () * 16; } - /* Other sequences would be OK. */ - /* - ** cntd_17: --** cntb x0, all, mul #2 --** incd x0 -+** rdvl (x[0-9]+), #17 -+** asr x0, \1, 3 - ** ret - */ - PROTO (cntd_17, uint64_t, ()) { return svcntd () * 17; } -@@ -107,8 +107,7 @@ PROTO (cntd_m15, uint64_t, ()) { return -svcntd () * 15; } - - /* - ** cntd_m16: --** cntb (x[0-9]+), all, mul #2 --** neg x0, \1 -+** rdvl x0, #-2 - ** ret - */ - PROTO (cntd_m16, uint64_t, ()) { return -svcntd () * 16; } -@@ -116,9 +115,8 @@ PROTO (cntd_m16, uint64_t, ()) { return -svcntd () * 16; } - /* Other sequences would be OK. */ - /* - ** cntd_m17: --** cntb x0, all, mul #2 --** incd x0 --** neg x0, x0 -+** rdvl (x[0-9]+), #-17 -+** asr x0, \1, 3 - ** ret - */ - PROTO (cntd_m17, uint64_t, ()) { return -svcntd () * 17; } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c -index c29930f15..1a4e7dc0e 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c -@@ -54,8 +54,8 @@ PROTO (cnth_16, uint64_t, ()) { return svcnth () * 16; } - /* Other sequences would be OK. */ - /* - ** cnth_17: --** cntb x0, all, mul #8 --** inch x0 -+** rdvl (x[0-9]+), #17 -+** asr x0, \1, 1 - ** ret - */ - PROTO (cnth_17, uint64_t, ()) { return svcnth () * 17; } -@@ -69,16 +69,16 @@ PROTO (cnth_32, uint64_t, ()) { return svcnth () * 32; } - - /* - ** cnth_64: --** cntd (x[0-9]+) --** lsl x0, \1, 8 -+** cntb (x[0-9]+) -+** lsl x0, \1, 5 - ** ret - */ - PROTO (cnth_64, uint64_t, ()) { return svcnth () * 64; } - - /* - ** cnth_128: --** cntd (x[0-9]+) --** lsl x0, \1, 9 -+** cntb (x[0-9]+) -+** lsl x0, \1, 6 - ** ret - */ - PROTO (cnth_128, uint64_t, ()) { return svcnth () * 128; } -@@ -109,8 +109,7 @@ PROTO (cnth_m15, uint64_t, ()) { return -svcnth () * 15; } - - /* - ** cnth_m16: --** cntb (x[0-9]+), all, mul #8 --** neg x0, \1 -+** rdvl x0, #-8 - ** ret - */ - PROTO (cnth_m16, uint64_t, ()) { return -svcnth () * 16; } -@@ -118,9 +117,8 @@ PROTO (cnth_m16, uint64_t, ()) { return -svcnth () * 16; } - /* Other sequences would be OK. */ - /* - ** cnth_m17: --** cntb x0, all, mul #8 --** inch x0 --** neg x0, x0 -+** rdvl (x[0-9]+), #-17 -+** asr x0, \1, 1 - ** ret - */ - PROTO (cnth_m17, uint64_t, ()) { return -svcnth () * 17; } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c -index e26cc67a4..9d1697690 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c -@@ -54,8 +54,8 @@ PROTO (cntw_16, uint64_t, ()) { return svcntw () * 16; } - /* Other sequences would be OK. */ - /* - ** cntw_17: --** cntb x0, all, mul #4 --** incw x0 -+** rdvl (x[0-9]+), #17 -+** asr x0, \1, 2 - ** ret - */ - PROTO (cntw_17, uint64_t, ()) { return svcntw () * 17; } -@@ -76,8 +76,8 @@ PROTO (cntw_64, uint64_t, ()) { return svcntw () * 64; } - - /* - ** cntw_128: --** cntd (x[0-9]+) --** lsl x0, \1, 8 -+** cntb (x[0-9]+) -+** lsl x0, \1, 5 - ** ret - */ - PROTO (cntw_128, uint64_t, ()) { return svcntw () * 128; } -@@ -108,8 +108,7 @@ PROTO (cntw_m15, uint64_t, ()) { return -svcntw () * 15; } - - /* - ** cntw_m16: --** cntb (x[0-9]+), all, mul #4 --** neg x0, \1 -+** rdvl (x[0-9]+), #-4 - ** ret - */ - PROTO (cntw_m16, uint64_t, ()) { return -svcntw () * 16; } -@@ -117,9 +116,8 @@ PROTO (cntw_m16, uint64_t, ()) { return -svcntw () * 16; } - /* Other sequences would be OK. */ - /* - ** cntw_m17: --** cntb x0, all, mul #4 --** incw x0 --** neg x0, x0 -+** rdvl (x[0-9]+), #-17 -+** asr x0, \1, 2 - ** ret - */ - PROTO (cntw_m17, uint64_t, ()) { return -svcntw () * 17; } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c -index c90730a03..94cd3a066 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c -@@ -218,8 +218,8 @@ TEST_PREFETCH (prfb_vnum_31, uint16_t, - - /* - ** prfb_vnum_32: --** cntd (x[0-9]+) --** lsl (x[0-9]+), \1, #?8 -+** cntb (x[0-9]+) -+** lsl (x[0-9]+), \1, #?5 - ** add (x[0-9]+), (\2, x0|x0, \2) - ** prfb pldl1keep, p0, \[\3\] - ** ret -@@ -240,7 +240,7 @@ TEST_PREFETCH (prfb_vnum_m32, uint16_t, - /* - ** prfb_vnum_m33: - ** ... --** prfb pldl1keep, p0, \[x[0-9]+\] -+** prfb pldl1keep, p0, \[x[0-9]+(, x[0-9]+)?\] - ** ret - */ - TEST_PREFETCH (prfb_vnum_m33, uint16_t, -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c -index 869ef3d3e..b7a116cf0 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c -@@ -218,8 +218,8 @@ TEST_PREFETCH (prfd_vnum_31, uint16_t, - - /* - ** prfd_vnum_32: --** cntd (x[0-9]+) --** lsl (x[0-9]+), \1, #?8 -+** cntb (x[0-9]+) -+** lsl (x[0-9]+), \1, #?5 - ** add (x[0-9]+), (\2, x0|x0, \2) - ** prfd pldl1keep, p0, \[\3\] - ** ret -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c -index 45a735eae..9d3df6bd3 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c -@@ -218,8 +218,8 @@ TEST_PREFETCH (prfh_vnum_31, uint16_t, - - /* - ** prfh_vnum_32: --** cntd (x[0-9]+) --** lsl (x[0-9]+), \1, #?8 -+** cntb (x[0-9]+) -+** lsl (x[0-9]+), \1, #?5 - ** add (x[0-9]+), (\2, x0|x0, \2) - ** prfh pldl1keep, p0, \[\3\] - ** ret -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c -index 444187f45..6962abab6 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c -@@ -218,8 +218,8 @@ TEST_PREFETCH (prfw_vnum_31, uint16_t, - - /* - ** prfw_vnum_32: --** cntd (x[0-9]+) --** lsl (x[0-9]+), \1, #?8 -+** cntb (x[0-9]+) -+** lsl (x[0-9]+), \1, #?5 - ** add (x[0-9]+), (\2, x0|x0, \2) - ** prfw pldl1keep, p0, \[\3\] - ** ret -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c -index 9ead9c21b..7f02497e8 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c -@@ -68,8 +68,7 @@ TEST_ALL (LOOP) - /* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, w[0-9]+\n} 3 } } */ - /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */ - /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */ --/* 2 for the calculations of -17 and 17. */ --/* { dg-final { scan-assembler-times {\tincw\tx[0-9]+\n} 10 } } */ -+/* { dg-final { scan-assembler-times {\tincw\tx[0-9]+\n} 8 } } */ - - /* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #16\n} 1 } } */ - /* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #15\n} 1 } } */ -@@ -86,8 +85,7 @@ TEST_ALL (LOOP) - /* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, x[0-9]+\n} 3 } } */ - /* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */ - /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */ --/* 2 for the calculations of -17 and 17. */ --/* { dg-final { scan-assembler-times {\tincd\tx[0-9]+\n} 10 } } */ -+/* { dg-final { scan-assembler-times {\tincd\tx[0-9]+\n} 8 } } */ - - /* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #16\n} 1 } } */ - /* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #15\n} 1 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c -index 110947a6c..5de34fc61 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c -@@ -6,8 +6,7 @@ - - /* - ** test_1: --** cntd x12, all, mul #9 --** lsl x12, x12, #?4 -+** rdvl x12, #18 - ** mov x11, sp - ** ... - ** sub sp, sp, x12 --- -2.33.0 - diff --git a/0163-Backport-SME-aarch64-Make-AARCH64_FL_SVE-requirement.patch b/0163-Backport-SME-aarch64-Make-AARCH64_FL_SVE-requirement.patch deleted file mode 100644 index 97108d8..0000000 --- a/0163-Backport-SME-aarch64-Make-AARCH64_FL_SVE-requirement.patch +++ /dev/null @@ -1,137 +0,0 @@ -From c0badff223a1f5ea5a0f75df72f5d0138d94d8e6 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:19 +0000 -Subject: [PATCH 064/157] [Backport][SME] aarch64: Make AARCH64_FL_SVE - requirements explicit - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dd7aaef62a43efe52eece525eea4d7d252b0c148 - -So far, all intrinsics covered by the aarch64-sve-builtins* -framework have (naturally enough) required at least SVE. -However, arm_sme.h defines a couple of intrinsics that can -be called by any code. It's therefore necessary to make -the implicit SVE requirement explicit. - -gcc/ - * config/aarch64/aarch64-sve-builtins.cc (function_groups): Remove - implied requirement on SVE. - * config/aarch64/aarch64-sve-builtins-base.def: Explicitly require SVE. - * config/aarch64/aarch64-sve-builtins-sve2.def: Likewise. ---- - .../aarch64/aarch64-sve-builtins-base.def | 10 +++++----- - .../aarch64/aarch64-sve-builtins-sve2.def | 18 +++++++++++++----- - gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +- - 3 files changed, 19 insertions(+), 11 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def -index ffdf7cb4c..3a58f76c3 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-base.def -+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def -@@ -17,7 +17,7 @@ - along with GCC; see the file COPYING3. If not see - . */ - --#define REQUIRED_EXTENSIONS 0 -+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE - DEF_SVE_FUNCTION (svabd, binary_opt_n, all_arith, mxz) - DEF_SVE_FUNCTION (svabs, unary, all_float_and_signed, mxz) - DEF_SVE_FUNCTION (svacge, compare_opt_n, all_float, implicit) -@@ -318,7 +318,7 @@ DEF_SVE_FUNCTION (svzip2, binary, all_data, none) - DEF_SVE_FUNCTION (svzip2, binary_pred, all_pred, none) - #undef REQUIRED_EXTENSIONS - --#define REQUIRED_EXTENSIONS AARCH64_FL_BF16 -+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_BF16 - DEF_SVE_FUNCTION (svbfdot, ternary_bfloat_opt_n, s_float, none) - DEF_SVE_FUNCTION (svbfdot_lane, ternary_bfloat_lanex2, s_float, none) - DEF_SVE_FUNCTION (svbfmlalb, ternary_bfloat_opt_n, s_float, none) -@@ -330,7 +330,7 @@ DEF_SVE_FUNCTION (svcvt, unary_convert, cvt_bfloat, mxz) - DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, mx) - #undef REQUIRED_EXTENSIONS - --#define REQUIRED_EXTENSIONS AARCH64_FL_I8MM -+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_I8MM - DEF_SVE_FUNCTION (svmmla, mmla, s_integer, none) - DEF_SVE_FUNCTION (svusmmla, ternary_uintq_intq, s_signed, none) - DEF_SVE_FUNCTION (svsudot, ternary_intq_uintq_opt_n, s_signed, none) -@@ -339,11 +339,11 @@ DEF_SVE_FUNCTION (svusdot, ternary_uintq_intq_opt_n, s_signed, none) - DEF_SVE_FUNCTION (svusdot_lane, ternary_uintq_intq_lane, s_signed, none) - #undef REQUIRED_EXTENSIONS - --#define REQUIRED_EXTENSIONS AARCH64_FL_F32MM -+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_F32MM - DEF_SVE_FUNCTION (svmmla, mmla, s_float, none) - #undef REQUIRED_EXTENSIONS - --#define REQUIRED_EXTENSIONS AARCH64_FL_F64MM -+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_F64MM - DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit) - DEF_SVE_FUNCTION (svmmla, mmla, d_float, none) - DEF_SVE_FUNCTION (svtrn1q, binary, all_data, none) -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def -index 635089ffc..d5f23a887 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def -+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def -@@ -17,7 +17,7 @@ - along with GCC; see the file COPYING3. If not see - . */ - --#define REQUIRED_EXTENSIONS AARCH64_FL_SVE2 -+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_SVE2 - DEF_SVE_FUNCTION (svaba, ternary_opt_n, all_integer, none) - DEF_SVE_FUNCTION (svabalb, ternary_long_opt_n, hsd_integer, none) - DEF_SVE_FUNCTION (svabalt, ternary_long_opt_n, hsd_integer, none) -@@ -189,7 +189,9 @@ DEF_SVE_FUNCTION (svwhilewr, compare_ptr, all_data, none) - DEF_SVE_FUNCTION (svxar, ternary_shift_right_imm, all_integer, none) - #undef REQUIRED_EXTENSIONS - --#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES) -+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ -+ | AARCH64_FL_SVE2 \ -+ | AARCH64_FL_SVE2_AES) - DEF_SVE_FUNCTION (svaesd, binary, b_unsigned, none) - DEF_SVE_FUNCTION (svaese, binary, b_unsigned, none) - DEF_SVE_FUNCTION (svaesmc, unary, b_unsigned, none) -@@ -198,17 +200,23 @@ DEF_SVE_FUNCTION (svpmullb_pair, binary_opt_n, d_unsigned, none) - DEF_SVE_FUNCTION (svpmullt_pair, binary_opt_n, d_unsigned, none) - #undef REQUIRED_EXTENSIONS - --#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM) -+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ -+ | AARCH64_FL_SVE2 \ -+ | AARCH64_FL_SVE2_BITPERM) - DEF_SVE_FUNCTION (svbdep, binary_opt_n, all_unsigned, none) - DEF_SVE_FUNCTION (svbext, binary_opt_n, all_unsigned, none) - DEF_SVE_FUNCTION (svbgrp, binary_opt_n, all_unsigned, none) - #undef REQUIRED_EXTENSIONS - --#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_SHA3) -+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ -+ | AARCH64_FL_SVE2 \ -+ | AARCH64_FL_SVE2_SHA3) - DEF_SVE_FUNCTION (svrax1, binary, d_integer, none) - #undef REQUIRED_EXTENSIONS - --#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_SM4) -+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ -+ | AARCH64_FL_SVE2 \ -+ | AARCH64_FL_SVE2_SM4) - DEF_SVE_FUNCTION (svsm4e, binary, s_unsigned, none) - DEF_SVE_FUNCTION (svsm4ekey, binary, s_unsigned, none) - #undef REQUIRED_EXTENSIONS -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc -index 7924cdf0f..dde01f676 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc -@@ -525,7 +525,7 @@ static const predication_index preds_z[] = { PRED_z, NUM_PREDS }; - static CONSTEXPR const function_group_info function_groups[] = { - #define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \ - { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, preds_##PREDS, \ -- REQUIRED_EXTENSIONS | AARCH64_FL_SVE }, -+ REQUIRED_EXTENSIONS }, - #include "aarch64-sve-builtins.def" - }; - --- -2.33.0 - diff --git a/0164-Backport-SME-aarch64-Add-group-suffixes-to-SVE-intri.patch b/0164-Backport-SME-aarch64-Add-group-suffixes-to-SVE-intri.patch deleted file mode 100644 index 4497814..0000000 --- a/0164-Backport-SME-aarch64-Add-group-suffixes-to-SVE-intri.patch +++ /dev/null @@ -1,562 +0,0 @@ -From e99332e15895156632949f3b6c3080fc9d994b13 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:19 +0000 -Subject: [PATCH 065/157] [Backport][SME] aarch64: Add group suffixes to SVE - intrinsics - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7b607f197967e052d7d7e29f6b41eded18f8c65d - -The SME2 ACLE adds a new "group" suffix component to the naming -convention for SVE intrinsics. This is also used in the new tuple -forms of the svreinterpret intrinsics. - -This patch adds support for group suffixes and defines the -x2, x3 and x4 suffixes that are needed for the svreinterprets. - -gcc/ - * config/aarch64/aarch64-sve-builtins-shapes.cc (build_one): Take - a group suffix index parameter. - (build_32_64, build_all): Update accordingly. Iterate over all - group suffixes. - * config/aarch64/aarch64-sve-builtins-sve2.cc (svqrshl_impl::fold) - (svqshl_impl::fold, svrshl_impl::fold): Update function_instance - constructors. - * config/aarch64/aarch64-sve-builtins.cc (group_suffixes): New array. - (groups_none): New constant. - (function_groups): Initialize the groups field. - (function_instance::hash): Hash the group index. - (function_builder::get_name): Add the group suffix. - (function_builder::add_overloaded_functions): Iterate over all - group suffixes. - (function_resolver::lookup_form): Take a group suffix parameter. - (function_resolver::resolve_to): Likewise. - * config/aarch64/aarch64-sve-builtins.def (DEF_SVE_GROUP_SUFFIX): New - macro. - (x2, x3, x4): New group suffixes. - * config/aarch64/aarch64-sve-builtins.h (group_suffix_index): New enum. - (group_suffix_info): New structure. - (function_group_info::groups): New member variable. - (function_instance::group_suffix_id): Likewise. - (group_suffixes): New array. - (function_instance::operator==): Compare the group suffixes. - (function_instance::group_suffix): New function. ---- - .../aarch64/aarch64-sve-builtins-shapes.cc | 53 ++++++------ - .../aarch64/aarch64-sve-builtins-sve2.cc | 10 +-- - gcc/config/aarch64/aarch64-sve-builtins.cc | 84 +++++++++++++------ - gcc/config/aarch64/aarch64-sve-builtins.def | 9 ++ - gcc/config/aarch64/aarch64-sve-builtins.h | 81 ++++++++++++++---- - 5 files changed, 165 insertions(+), 72 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -index 4fa4181b9..3ecef026c 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -@@ -275,18 +275,20 @@ parse_signature (const function_instance &instance, const char *format, - } - - /* Add one function instance for GROUP, using mode suffix MODE_SUFFIX_ID, -- the type suffixes at index TI and the predication suffix at index PI. -- The other arguments are as for build_all. */ -+ the type suffixes at index TI, the group suffixes at index GI, and the -+ predication suffix at index PI. The other arguments are as for -+ build_all. */ - static void - build_one (function_builder &b, const char *signature, - const function_group_info &group, mode_suffix_index mode_suffix_id, -- unsigned int ti, unsigned int pi, bool force_direct_overloads) -+ unsigned int ti, unsigned int gi, unsigned int pi, -+ bool force_direct_overloads) - { - /* Byte forms of svdupq take 16 arguments. */ - auto_vec argument_types; - function_instance instance (group.base_name, *group.base, *group.shape, - mode_suffix_id, group.types[ti], -- group.preds[pi]); -+ group.groups[gi], group.preds[pi]); - tree return_type = parse_signature (instance, signature, argument_types); - apply_predication (instance, return_type, argument_types); - b.add_unique_function (instance, return_type, argument_types, -@@ -312,24 +314,26 @@ build_32_64 (function_builder &b, const char *signature, - mode_suffix_index mode64, bool force_direct_overloads = false) - { - for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi) -- if (group.types[0][0] == NUM_TYPE_SUFFIXES) -- { -- gcc_assert (mode32 != MODE_none && mode64 != MODE_none); -- build_one (b, signature, group, mode32, 0, pi, -- force_direct_overloads); -- build_one (b, signature, group, mode64, 0, pi, -- force_direct_overloads); -- } -- else -- for (unsigned int ti = 0; group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti) -+ for (unsigned int gi = 0; group.groups[gi] != NUM_GROUP_SUFFIXES; ++gi) -+ if (group.types[0][0] == NUM_TYPE_SUFFIXES) - { -- unsigned int bits = type_suffixes[group.types[ti][0]].element_bits; -- gcc_assert (bits == 32 || bits == 64); -- mode_suffix_index mode = bits == 32 ? mode32 : mode64; -- if (mode != MODE_none) -- build_one (b, signature, group, mode, ti, pi, -- force_direct_overloads); -+ gcc_assert (mode32 != MODE_none && mode64 != MODE_none); -+ build_one (b, signature, group, mode32, 0, gi, pi, -+ force_direct_overloads); -+ build_one (b, signature, group, mode64, 0, gi, pi, -+ force_direct_overloads); - } -+ else -+ for (unsigned int ti = 0; group.types[ti][0] != NUM_TYPE_SUFFIXES; -+ ++ti) -+ { -+ unsigned int bits = type_suffixes[group.types[ti][0]].element_bits; -+ gcc_assert (bits == 32 || bits == 64); -+ mode_suffix_index mode = bits == 32 ? mode32 : mode64; -+ if (mode != MODE_none) -+ build_one (b, signature, group, mode, ti, gi, pi, -+ force_direct_overloads); -+ } - } - - /* For every type and predicate combination in GROUP, add one function -@@ -423,10 +427,11 @@ build_all (function_builder &b, const char *signature, - bool force_direct_overloads = false) - { - for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi) -- for (unsigned int ti = 0; -- ti == 0 || group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti) -- build_one (b, signature, group, mode_suffix_id, ti, pi, -- force_direct_overloads); -+ for (unsigned int gi = 0; group.groups[gi] != NUM_GROUP_SUFFIXES; ++gi) -+ for (unsigned int ti = 0; -+ ti == 0 || group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti) -+ build_one (b, signature, group, mode_suffix_id, ti, gi, pi, -+ force_direct_overloads); - } - - /* TYPE is the largest type suffix associated with the arguments of R, -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc -index e066f096d..a94e5e269 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc -@@ -252,7 +252,7 @@ public: - that we can use for sensible shift amounts. */ - function_instance instance ("svqshl", functions::svqshl, - shapes::binary_int_opt_n, MODE_n, -- f.type_suffix_ids, f.pred); -+ f.type_suffix_ids, GROUP_none, f.pred); - return f.redirect_call (instance); - } - else -@@ -261,7 +261,7 @@ public: - that we can use for sensible shift amounts. */ - function_instance instance ("svrshl", functions::svrshl, - shapes::binary_int_opt_n, MODE_n, -- f.type_suffix_ids, f.pred); -+ f.type_suffix_ids, GROUP_none, f.pred); - return f.redirect_call (instance); - } - } -@@ -290,7 +290,7 @@ public: - -wi::to_wide (amount)); - function_instance instance ("svasr", functions::svasr, - shapes::binary_uint_opt_n, MODE_n, -- f.type_suffix_ids, f.pred); -+ f.type_suffix_ids, GROUP_none, f.pred); - if (f.type_suffix (0).unsigned_p) - { - instance.base_name = "svlsr"; -@@ -322,7 +322,7 @@ public: - that we can use for sensible shift amounts. */ - function_instance instance ("svlsl", functions::svlsl, - shapes::binary_uint_opt_n, MODE_n, -- f.type_suffix_ids, f.pred); -+ f.type_suffix_ids, GROUP_none, f.pred); - gcall *call = as_a (f.redirect_call (instance)); - gimple_call_set_arg (call, 2, amount); - return call; -@@ -335,7 +335,7 @@ public: - -wi::to_wide (amount)); - function_instance instance ("svrshr", functions::svrshr, - shapes::shift_right_imm, MODE_n, -- f.type_suffix_ids, f.pred); -+ f.type_suffix_ids, GROUP_none, f.pred); - gcall *call = as_a (f.redirect_call (instance)); - gimple_call_set_arg (call, 2, amount); - return call; -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc -index dde01f676..dc3fd80da 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc -@@ -144,6 +144,13 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = { - 0, VOIDmode } - }; - -+CONSTEXPR const group_suffix_info group_suffixes[] = { -+#define DEF_SVE_GROUP_SUFFIX(NAME, VG, VECTORS_PER_TUPLE) \ -+ { "_" #NAME, VG, VECTORS_PER_TUPLE }, -+#include "aarch64-sve-builtins.def" -+ { "", 0, 1 } -+}; -+ - /* Define a TYPES_ macro for each combination of type - suffixes that an ACLE function can have, where is the - name used in DEF_SVE_FUNCTION entries. -@@ -483,6 +490,10 @@ DEF_SVE_TYPES_ARRAY (inc_dec_n); - DEF_SVE_TYPES_ARRAY (reinterpret); - DEF_SVE_TYPES_ARRAY (while); - -+static const group_suffix_index groups_none[] = { -+ GROUP_none, NUM_GROUP_SUFFIXES -+}; -+ - /* Used by functions that have no governing predicate. */ - static const predication_index preds_none[] = { PRED_none, NUM_PREDS }; - -@@ -524,8 +535,8 @@ static const predication_index preds_z[] = { PRED_z, NUM_PREDS }; - /* A list of all SVE ACLE functions. */ - static CONSTEXPR const function_group_info function_groups[] = { - #define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \ -- { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, preds_##PREDS, \ -- REQUIRED_EXTENSIONS }, -+ { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, groups_none, \ -+ preds_##PREDS, REQUIRED_EXTENSIONS }, - #include "aarch64-sve-builtins.def" - }; - -@@ -788,6 +799,7 @@ function_instance::hash () const - h.add_int (mode_suffix_id); - h.add_int (type_suffix_ids[0]); - h.add_int (type_suffix_ids[1]); -+ h.add_int (group_suffix_id); - h.add_int (pred); - return h.end (); - } -@@ -957,6 +969,8 @@ function_builder::get_name (const function_instance &instance, - for (unsigned int i = 0; i < 2; ++i) - if (!overloaded_p || instance.shape->explicit_type_suffix_p (i)) - append_name (instance.type_suffix (i).string); -+ if (!overloaded_p || instance.shape->explicit_group_suffix_p ()) -+ append_name (instance.group_suffix ().string); - append_name (pred_suffixes[instance.pred]); - return finish_name (); - } -@@ -1113,19 +1127,26 @@ void - function_builder::add_overloaded_functions (const function_group_info &group, - mode_suffix_index mode) - { -- unsigned int explicit_type0 = (*group.shape)->explicit_type_suffix_p (0); -- unsigned int explicit_type1 = (*group.shape)->explicit_type_suffix_p (1); -- for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi) -+ bool explicit_type0 = (*group.shape)->explicit_type_suffix_p (0); -+ bool explicit_type1 = (*group.shape)->explicit_type_suffix_p (1); -+ bool explicit_group = (*group.shape)->explicit_group_suffix_p (); -+ auto add_function = [&](const type_suffix_pair &types, -+ group_suffix_index group_suffix_id, -+ unsigned int pi) -+ { -+ function_instance instance (group.base_name, *group.base, -+ *group.shape, mode, types, -+ group_suffix_id, group.preds[pi]); -+ add_overloaded_function (instance, group.required_extensions); -+ }; -+ -+ auto add_group_suffix = [&](group_suffix_index group_suffix_id, -+ unsigned int pi) - { - if (!explicit_type0 && !explicit_type1) -- { -- /* Deal with the common case in which there is one overloaded -- function for all type combinations. */ -- function_instance instance (group.base_name, *group.base, -- *group.shape, mode, types_none[0], -- group.preds[pi]); -- add_overloaded_function (instance, group.required_extensions); -- } -+ /* Deal with the common case in which there is one overloaded -+ function for all type combinations. */ -+ add_function (types_none[0], group_suffix_id, pi); - else - for (unsigned int ti = 0; group.types[ti][0] != NUM_TYPE_SUFFIXES; - ++ti) -@@ -1136,12 +1157,16 @@ function_builder::add_overloaded_functions (const function_group_info &group, - explicit_type0 ? group.types[ti][0] : NUM_TYPE_SUFFIXES, - explicit_type1 ? group.types[ti][1] : NUM_TYPE_SUFFIXES - }; -- function_instance instance (group.base_name, *group.base, -- *group.shape, mode, types, -- group.preds[pi]); -- add_overloaded_function (instance, group.required_extensions); -+ add_function (types, group_suffix_id, pi); - } -- } -+ }; -+ -+ for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi) -+ if (explicit_group) -+ for (unsigned int gi = 0; group.groups[gi] != NUM_GROUP_SUFFIXES; ++gi) -+ add_group_suffix (group.groups[gi], pi); -+ else -+ add_group_suffix (GROUP_none, pi); - } - - /* Register all the functions in GROUP. */ -@@ -1213,29 +1238,34 @@ function_resolver::report_no_such_form (type_suffix_index type) - } - - /* Silently check whether there is an instance of the function with the -- mode suffix given by MODE and the type suffixes given by TYPE0 and TYPE1. -- Return its function decl if so, otherwise return null. */ -+ mode suffix given by MODE, the type suffixes given by TYPE0 and TYPE1, -+ and the group suffix given by GROUP. Return its function decl if so, -+ otherwise return null. */ - tree - function_resolver::lookup_form (mode_suffix_index mode, - type_suffix_index type0, -- type_suffix_index type1) -+ type_suffix_index type1, -+ group_suffix_index group) - { - type_suffix_pair types = { type0, type1 }; -- function_instance instance (base_name, base, shape, mode, types, pred); -+ function_instance instance (base_name, base, shape, mode, types, -+ group, pred); - registered_function *rfn - = function_table->find_with_hash (instance, instance.hash ()); - return rfn ? rfn->decl : NULL_TREE; - } - --/* Resolve the function to one with the mode suffix given by MODE and the -- type suffixes given by TYPE0 and TYPE1. Return its function decl on -- success, otherwise report an error and return error_mark_node. */ -+/* Resolve the function to one with the mode suffix given by MODE, the -+ type suffixes given by TYPE0 and TYPE1, and group suffix given by -+ GROUP. Return its function decl on success, otherwise report an -+ error and return error_mark_node. */ - tree - function_resolver::resolve_to (mode_suffix_index mode, - type_suffix_index type0, -- type_suffix_index type1) -+ type_suffix_index type1, -+ group_suffix_index group) - { -- tree res = lookup_form (mode, type0, type1); -+ tree res = lookup_form (mode, type0, type1, group); - if (!res) - { - if (type1 == NUM_TYPE_SUFFIXES) -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def -index 6e4dcdbc9..d9bf9c350 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.def -+++ b/gcc/config/aarch64/aarch64-sve-builtins.def -@@ -29,6 +29,10 @@ - #define DEF_SVE_TYPE_SUFFIX(A, B, C, D, E) - #endif - -+#ifndef DEF_SVE_GROUP_SUFFIX -+#define DEF_SVE_GROUP_SUFFIX(A, B, C) -+#endif -+ - #ifndef DEF_SVE_FUNCTION - #define DEF_SVE_FUNCTION(A, B, C, D) - #endif -@@ -95,10 +99,15 @@ DEF_SVE_TYPE_SUFFIX (u16, svuint16_t, unsigned, 16, VNx8HImode) - DEF_SVE_TYPE_SUFFIX (u32, svuint32_t, unsigned, 32, VNx4SImode) - DEF_SVE_TYPE_SUFFIX (u64, svuint64_t, unsigned, 64, VNx2DImode) - -+DEF_SVE_GROUP_SUFFIX (x2, 0, 2) -+DEF_SVE_GROUP_SUFFIX (x3, 0, 3) -+DEF_SVE_GROUP_SUFFIX (x4, 0, 4) -+ - #include "aarch64-sve-builtins-base.def" - #include "aarch64-sve-builtins-sve2.def" - - #undef DEF_SVE_FUNCTION -+#undef DEF_SVE_GROUP_SUFFIX - #undef DEF_SVE_TYPE_SUFFIX - #undef DEF_SVE_TYPE - #undef DEF_SVE_MODE -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h -index 824c31cd7..374c57e93 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.h -+++ b/gcc/config/aarch64/aarch64-sve-builtins.h -@@ -180,6 +180,17 @@ enum type_suffix_index - NUM_TYPE_SUFFIXES - }; - -+/* Enumerates the possible group suffixes. Each suffix combines two -+ optional pieces of information: the vector group size in a ZA index, -+ and the number of vectors in the largest tuple argument. */ -+enum group_suffix_index -+{ -+#define DEF_SVE_GROUP_SUFFIX(NAME, VG, VECTORS_PER_TUPLE) GROUP_##NAME, -+#include "aarch64-sve-builtins.def" -+ GROUP_none, -+ NUM_GROUP_SUFFIXES -+}; -+ - /* Combines two type suffixes. */ - typedef enum type_suffix_index type_suffix_pair[2]; - -@@ -237,6 +248,21 @@ struct type_suffix_info - machine_mode vector_mode : 16; - }; - -+/* Static information about a group suffix. */ -+struct group_suffix_info -+{ -+ /* The suffix string itself. */ -+ const char *string; -+ -+ /* If the suffix describes a vector group in a ZA index, this is the -+ size of that group, otherwise it is zero. */ -+ unsigned int vg; -+ -+ /* The number of vectors in the largest (or only) tuple argument, -+ or 1 if the suffix does not convey this information. */ -+ unsigned int vectors_per_tuple; -+}; -+ - /* Static information about a set of functions. */ - struct function_group_info - { -@@ -251,14 +277,16 @@ struct function_group_info - shapes. */ - const function_shape *const *shape; - -- /* A list of the available type suffixes, and of the available predication -- types. The function supports every combination of the two. -+ /* A list of the available type suffixes, group suffixes, and predication -+ types. The function supports every combination of the three. -+ -+ The list of type suffixes is terminated by two NUM_TYPE_SUFFIXES. -+ It is lexicographically ordered based on the index value. - -- The list of type suffixes is terminated by two NUM_TYPE_SUFFIXES -- while the list of predication types is terminated by NUM_PREDS. -- The list of type suffixes is lexicographically ordered based -- on the index value. */ -+ The list of group suffixes is terminated by NUM_GROUP_SUFFIXES -+ and the list of predication types is terminated by NUM_PREDS. */ - const type_suffix_pair *types; -+ const group_suffix_index *groups; - const predication_index *preds; - - /* The architecture extensions that the functions require, as a set of -@@ -273,7 +301,8 @@ class GTY((user)) function_instance - public: - function_instance (const char *, const function_base *, - const function_shape *, mode_suffix_index, -- const type_suffix_pair &, predication_index); -+ const type_suffix_pair &, group_suffix_index, -+ predication_index); - - bool operator== (const function_instance &) const; - bool operator!= (const function_instance &) const; -@@ -294,6 +323,8 @@ public: - units_index displacement_units () const; - - const type_suffix_info &type_suffix (unsigned int) const; -+ const group_suffix_info &group_suffix () const; -+ - tree scalar_type (unsigned int) const; - tree vector_type (unsigned int) const; - tree tuple_type (unsigned int) const; -@@ -301,14 +332,14 @@ public: - machine_mode vector_mode (unsigned int) const; - machine_mode gp_mode (unsigned int) const; - -- /* The properties of the function. (The explicit "enum"s are required -- for gengtype.) */ -+ /* The properties of the function. */ - const char *base_name; - const function_base *base; - const function_shape *shape; -- enum mode_suffix_index mode_suffix_id; -+ mode_suffix_index mode_suffix_id; - type_suffix_pair type_suffix_ids; -- enum predication_index pred; -+ group_suffix_index group_suffix_id; -+ predication_index pred; - }; - - class registered_function; -@@ -390,10 +421,12 @@ public: - tree report_no_such_form (type_suffix_index); - tree lookup_form (mode_suffix_index, - type_suffix_index = NUM_TYPE_SUFFIXES, -- type_suffix_index = NUM_TYPE_SUFFIXES); -+ type_suffix_index = NUM_TYPE_SUFFIXES, -+ group_suffix_index = GROUP_none); - tree resolve_to (mode_suffix_index, - type_suffix_index = NUM_TYPE_SUFFIXES, -- type_suffix_index = NUM_TYPE_SUFFIXES); -+ type_suffix_index = NUM_TYPE_SUFFIXES, -+ group_suffix_index = GROUP_none); - - type_suffix_index infer_integer_scalar_type (unsigned int); - type_suffix_index infer_pointer_type (unsigned int, bool = false); -@@ -641,6 +674,11 @@ class function_shape - public: - virtual bool explicit_type_suffix_p (unsigned int) const = 0; - -+ /* True if the group suffix is present in overloaded names. -+ This isn't meaningful for pre-SME intrinsics, and true is -+ more common than false, so provide a default definition. */ -+ virtual bool explicit_group_suffix_p () const { return true; } -+ - /* Define all functions associated with the given group. */ - virtual void build (function_builder &, - const function_group_info &) const = 0; -@@ -669,6 +707,7 @@ private: - - extern const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1]; - extern const mode_suffix_info mode_suffixes[MODE_none + 1]; -+extern const group_suffix_info group_suffixes[NUM_GROUP_SUFFIXES]; - - extern tree scalar_types[NUM_VECTOR_TYPES]; - extern tree acle_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1]; -@@ -728,9 +767,11 @@ function_instance (const char *base_name_in, - const function_shape *shape_in, - mode_suffix_index mode_suffix_id_in, - const type_suffix_pair &type_suffix_ids_in, -+ group_suffix_index group_suffix_id_in, - predication_index pred_in) - : base_name (base_name_in), base (base_in), shape (shape_in), -- mode_suffix_id (mode_suffix_id_in), pred (pred_in) -+ mode_suffix_id (mode_suffix_id_in), group_suffix_id (group_suffix_id_in), -+ pred (pred_in) - { - memcpy (type_suffix_ids, type_suffix_ids_in, sizeof (type_suffix_ids)); - } -@@ -741,9 +782,10 @@ function_instance::operator== (const function_instance &other) const - return (base == other.base - && shape == other.shape - && mode_suffix_id == other.mode_suffix_id -- && pred == other.pred - && type_suffix_ids[0] == other.type_suffix_ids[0] -- && type_suffix_ids[1] == other.type_suffix_ids[1]); -+ && type_suffix_ids[1] == other.type_suffix_ids[1] -+ && group_suffix_id == other.group_suffix_id -+ && pred == other.pred); - } - - inline bool -@@ -815,6 +857,13 @@ function_instance::type_suffix (unsigned int i) const - return type_suffixes[type_suffix_ids[i]]; - } - -+/* Return information about the function's group suffix. */ -+inline const group_suffix_info & -+function_instance::group_suffix () const -+{ -+ return group_suffixes[group_suffix_id]; -+} -+ - /* Return the scalar type associated with type suffix I. */ - inline tree - function_instance::scalar_type (unsigned int i) const --- -2.33.0 - diff --git a/0165-Backport-SME-aarch64-Add-sve_type-to-SVE-builtins-co.patch b/0165-Backport-SME-aarch64-Add-sve_type-to-SVE-builtins-co.patch deleted file mode 100644 index 5e9a7eb..0000000 --- a/0165-Backport-SME-aarch64-Add-sve_type-to-SVE-builtins-co.patch +++ /dev/null @@ -1,230 +0,0 @@ -From a32a9321b3336907fe2d17148cb9e4652642a3e6 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:20 +0000 -Subject: [PATCH 066/157] [Backport][SME] aarch64: Add sve_type to SVE builtins - code - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7f6de9861e5d7745a0af5174582519a39d545a92 - -Until now, the SVE ACLE code had mostly been able to represent -individual SVE arguments with just an element type suffix (s32, u32, -etc.). However, the SME2 ACLE provides many overloaded intrinsics -that operate on tuples rather than single vectors. This patch -therefore adds a new type (sve_type) that combines an element -type suffix with a vector count. This is enough to uniquely -represent all SVE ACLE types. - -gcc/ - * config/aarch64/aarch64-sve-builtins.h (sve_type): New struct. - (sve_type::operator==): New function. - (function_resolver::get_vector_type): Delete. - (function_resolver::report_no_such_form): Take an sve_type rather - than a type_suffix_index. - * config/aarch64/aarch64-sve-builtins.cc (get_vector_type): New - function. - (function_resolver::get_vector_type): Delete. - (function_resolver::report_no_such_form): Take an sve_type rather - than a type_suffix_index. - (find_sve_type): New function, split out from... - (function_resolver::infer_vector_or_tuple_type): ...here. ---- - gcc/config/aarch64/aarch64-sve-builtins.cc | 93 ++++++++++++---------- - gcc/config/aarch64/aarch64-sve-builtins.h | 37 ++++++++- - 2 files changed, 88 insertions(+), 42 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc -index dc3fd80da..cc676bfe1 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc -@@ -659,6 +659,14 @@ find_type_suffix_for_scalar_type (const_tree type) - return NUM_TYPE_SUFFIXES; - } - -+/* Return the vector type associated with TYPE. */ -+static tree -+get_vector_type (sve_type type) -+{ -+ auto vector_type = type_suffixes[type.type].vector_type; -+ return acle_vector_types[type.num_vectors - 1][vector_type]; -+} -+ - /* Report an error against LOCATION that the user has tried to use - function FNDECL when extension EXTENSION is disabled. */ - static void -@@ -1190,13 +1198,6 @@ function_resolver::function_resolver (location_t location, - { - } - --/* Return the vector type associated with type suffix TYPE. */ --tree --function_resolver::get_vector_type (type_suffix_index type) --{ -- return acle_vector_types[0][type_suffixes[type].vector_type]; --} -- - /* Return the name associated with TYPE. Using the - name should be more user-friendly than the underlying canonical type, - since it makes the signedness and bitwidth explicit. */ -@@ -1227,10 +1228,10 @@ function_resolver::scalar_argument_p (unsigned int i) - || SCALAR_FLOAT_TYPE_P (type)); - } - --/* Report that the function has no form that takes type suffix TYPE. -+/* Report that the function has no form that takes type TYPE. - Return error_mark_node. */ - tree --function_resolver::report_no_such_form (type_suffix_index type) -+function_resolver::report_no_such_form (sve_type type) - { - error_at (location, "%qE has no form that takes %qT arguments", - fndecl, get_vector_type (type)); -@@ -1352,6 +1353,25 @@ function_resolver::infer_pointer_type (unsigned int argno, - return type; - } - -+/* If TYPE is an SVE predicate or vector type, or a tuple of such a type, -+ return the associated sve_type, otherwise return an invalid sve_type. */ -+static sve_type -+find_sve_type (const_tree type) -+{ -+ /* A linear search should be OK here, since the code isn't hot and -+ the number of types is only small. */ -+ for (unsigned int size_i = 0; size_i < MAX_TUPLE_SIZE; ++size_i) -+ for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i) -+ { -+ vector_type_index type_i = type_suffixes[suffix_i].vector_type; -+ tree this_type = acle_vector_types[size_i][type_i]; -+ if (this_type && matches_type_p (this_type, type)) -+ return { type_suffix_index (suffix_i), size_i + 1 }; -+ } -+ -+ return {}; -+} -+ - /* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS - vectors; NUM_VECTORS is 1 for the former. Return the associated type - suffix on success, using TYPE_SUFFIX_b for predicates. Report an error -@@ -1364,37 +1384,30 @@ function_resolver::infer_vector_or_tuple_type (unsigned int argno, - if (actual == error_mark_node) - return NUM_TYPE_SUFFIXES; - -- /* A linear search should be OK here, since the code isn't hot and -- the number of types is only small. */ -- for (unsigned int size_i = 0; size_i < MAX_TUPLE_SIZE; ++size_i) -- for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i) -- { -- vector_type_index type_i = type_suffixes[suffix_i].vector_type; -- tree type = acle_vector_types[size_i][type_i]; -- if (type && matches_type_p (type, actual)) -- { -- if (size_i + 1 == num_vectors) -- return type_suffix_index (suffix_i); -- -- if (num_vectors == 1) -- error_at (location, "passing %qT to argument %d of %qE, which" -- " expects a single SVE vector rather than a tuple", -- actual, argno + 1, fndecl); -- else if (size_i == 0 && type_i != VECTOR_TYPE_svbool_t) -- /* num_vectors is always != 1, so the singular isn't needed. */ -- error_n (location, num_vectors, "%qT%d%qE%d", -- "passing single vector %qT to argument %d" -- " of %qE, which expects a tuple of %d vectors", -- actual, argno + 1, fndecl, num_vectors); -- else -- /* num_vectors is always != 1, so the singular isn't needed. */ -- error_n (location, num_vectors, "%qT%d%qE%d", -- "passing %qT to argument %d of %qE, which" -- " expects a tuple of %d vectors", actual, argno + 1, -- fndecl, num_vectors); -- return NUM_TYPE_SUFFIXES; -- } -- } -+ if (auto sve_type = find_sve_type (actual)) -+ { -+ if (sve_type.num_vectors == num_vectors) -+ return sve_type.type; -+ -+ if (num_vectors == 1) -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects a single SVE vector rather than a tuple", -+ actual, argno + 1, fndecl); -+ else if (sve_type.num_vectors == 1 -+ && sve_type.type != TYPE_SUFFIX_b) -+ /* num_vectors is always != 1, so the singular isn't needed. */ -+ error_n (location, num_vectors, "%qT%d%qE%d", -+ "passing single vector %qT to argument %d" -+ " of %qE, which expects a tuple of %d vectors", -+ actual, argno + 1, fndecl, num_vectors); -+ else -+ /* num_vectors is always != 1, so the singular isn't needed. */ -+ error_n (location, num_vectors, "%qT%d%qE%d", -+ "passing %qT to argument %d of %qE, which" -+ " expects a tuple of %d vectors", actual, argno + 1, -+ fndecl, num_vectors); -+ return NUM_TYPE_SUFFIXES; -+ } - - if (num_vectors == 1) - error_at (location, "passing %qT to argument %d of %qE, which" -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h -index 374c57e93..f4f2c415f 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.h -+++ b/gcc/config/aarch64/aarch64-sve-builtins.h -@@ -263,6 +263,40 @@ struct group_suffix_info - unsigned int vectors_per_tuple; - }; - -+/* Represents an SVE vector, predicate, tuple of vectors, or tuple of -+ predicates. There is also a representation of "no type"/"invalid type". */ -+struct sve_type -+{ -+ sve_type () = default; -+ sve_type (type_suffix_index type) : type (type), num_vectors (1) {} -+ sve_type (type_suffix_index type, unsigned int num_vectors) -+ : type (type), num_vectors (num_vectors) {} -+ -+ /* Return true if the type is valid. */ -+ explicit operator bool () const { return type != NUM_TYPE_SUFFIXES; } -+ -+ bool operator== (const sve_type &) const; -+ bool operator!= (const sve_type &x) const { return !operator== (x); } -+ -+ /* This is one of: -+ -+ - TYPE_SUFFIX_b for svbool_t-based types -+ - TYPE_SUFFIX_c for svcount_t-based types -+ - the type suffix of a data element for SVE data vectors and tuples -+ - NUM_TYPE_SUFFIXES for invalid types. */ -+ type_suffix_index type = NUM_TYPE_SUFFIXES; -+ -+ /* If the type is a tuple, this is the number of vectors in the tuple, -+ otherwise it is 1. */ -+ unsigned int num_vectors = 1; -+}; -+ -+inline bool -+sve_type::operator== (const sve_type &other) const -+{ -+ return type == other.type && num_vectors == other.num_vectors; -+} -+ - /* Static information about a set of functions. */ - struct function_group_info - { -@@ -413,12 +447,11 @@ public: - function_resolver (location_t, const function_instance &, tree, - vec &); - -- tree get_vector_type (type_suffix_index); - const char *get_scalar_type_name (type_suffix_index); - tree get_argument_type (unsigned int); - bool scalar_argument_p (unsigned int); - -- tree report_no_such_form (type_suffix_index); -+ tree report_no_such_form (sve_type); - tree lookup_form (mode_suffix_index, - type_suffix_index = NUM_TYPE_SUFFIXES, - type_suffix_index = NUM_TYPE_SUFFIXES, --- -2.33.0 - diff --git a/0166-Backport-SME-aarch64-Generalise-some-SVE-ACLE-error-.patch b/0166-Backport-SME-aarch64-Generalise-some-SVE-ACLE-error-.patch deleted file mode 100644 index 5c7889c..0000000 --- a/0166-Backport-SME-aarch64-Generalise-some-SVE-ACLE-error-.patch +++ /dev/null @@ -1,1474 +0,0 @@ -From 21839879d5f00db48cdacd472044a9bd4e23a2c6 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:20 +0000 -Subject: [PATCH 067/157] [Backport][SME] aarch64: Generalise some SVE ACLE - error messages - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bb01ef94ff5096b907639aa3a1d77850921e7d37 - -The current SVE ACLE function-resolution diagnostics assume -that a function has a fixed choice between vectors or tuples -of vectors. If an argument was not an SVE type at all, the -error message said the function "expects an SVE vector type" -or "expects an SVE tuple type". - -This patch generalises the error to cope with cases where -an argument can be either a vector or a tuple. It also splits -out the diagnostics for mismatched tuple sizes, so that they -can be reused by later patches. - -gcc/ - * config/aarch64/aarch64-sve-builtins.h - (function_resolver::infer_sve_type): New member function. - (function_resolver::report_incorrect_num_vectors): Likewise. - * config/aarch64/aarch64-sve-builtins.cc - (function_resolver::infer_sve_type): New function,. - (function_resolver::report_incorrect_num_vectors): New function, - split out from... - (function_resolver::infer_vector_or_tuple_type): ...here. Use - infer_sve_type. - -gcc/testsuite/ - * gcc.target/aarch64/sve/acle/general-c/*: Update expected error - messages. ---- - gcc/config/aarch64/aarch64-sve-builtins.cc | 87 ++++++++++++------- - gcc/config/aarch64/aarch64-sve-builtins.h | 3 + - .../aarch64/sve/acle/general-c/adr_index_1.c | 6 +- - .../aarch64/sve/acle/general-c/adr_offset_1.c | 6 +- - .../aarch64/sve/acle/general-c/binary_1.c | 2 +- - .../sve/acle/general-c/binary_int_opt_n.c | 2 +- - .../sve/acle/general-c/binary_lane_1.c | 4 +- - .../sve/acle/general-c/binary_long_lane_1.c | 4 +- - .../sve/acle/general-c/binary_long_opt_n_1.c | 2 +- - .../aarch64/sve/acle/general-c/binary_n_1.c | 2 +- - .../acle/general-c/binary_narrowb_opt_n_1.c | 2 +- - .../acle/general-c/binary_narrowt_opt_n_1.c | 4 +- - .../sve/acle/general-c/binary_opt_n_2.c | 2 +- - .../sve/acle/general-c/binary_opt_n_3.c | 2 +- - .../sve/acle/general-c/binary_rotate_1.c | 4 +- - .../sve/acle/general-c/binary_to_uint_1.c | 4 +- - .../sve/acle/general-c/binary_uint64_n_1.c | 2 +- - .../acle/general-c/binary_uint64_opt_n_2.c | 2 +- - .../sve/acle/general-c/binary_uint_1.c | 2 +- - .../sve/acle/general-c/binary_uint_n_1.c | 2 +- - .../sve/acle/general-c/binary_uint_opt_n_1.c | 2 +- - .../sve/acle/general-c/binary_wide_1.c | 8 +- - .../sve/acle/general-c/binary_wide_opt_n_1.c | 4 +- - .../aarch64/sve/acle/general-c/clast_1.c | 4 +- - .../aarch64/sve/acle/general-c/compare_1.c | 4 +- - .../sve/acle/general-c/compare_opt_n_1.c | 2 +- - .../sve/acle/general-c/compare_wide_opt_n_1.c | 2 +- - .../sve/acle/general-c/count_vector_1.c | 2 +- - .../aarch64/sve/acle/general-c/create_1.c | 4 +- - .../aarch64/sve/acle/general-c/create_3.c | 4 +- - .../aarch64/sve/acle/general-c/create_5.c | 4 +- - .../aarch64/sve/acle/general-c/fold_left_1.c | 4 +- - .../sve/acle/general-c/inc_dec_pred_1.c | 2 +- - .../aarch64/sve/acle/general-c/mmla_1.c | 10 +-- - .../acle/general-c/prefetch_gather_offset_2.c | 2 +- - .../aarch64/sve/acle/general-c/reduction_1.c | 2 +- - .../sve/acle/general-c/reduction_wide_1.c | 2 +- - .../general-c/shift_right_imm_narrowb_1.c | 2 +- - .../shift_right_imm_narrowb_to_uint_1.c | 2 +- - .../general-c/shift_right_imm_narrowt_1.c | 4 +- - .../shift_right_imm_narrowt_to_uint_1.c | 4 +- - .../aarch64/sve/acle/general-c/store_1.c | 2 +- - .../aarch64/sve/acle/general-c/store_2.c | 2 +- - .../acle/general-c/store_scatter_offset_1.c | 4 +- - .../sve/acle/general-c/ternary_bfloat16_1.c | 2 +- - .../acle/general-c/ternary_bfloat16_lane_1.c | 2 +- - .../general-c/ternary_bfloat16_lanex2_1.c | 2 +- - .../acle/general-c/ternary_bfloat16_opt_n_1.c | 2 +- - .../general-c/ternary_intq_uintq_lane_1.c | 6 +- - .../general-c/ternary_intq_uintq_opt_n_1.c | 4 +- - .../sve/acle/general-c/ternary_lane_1.c | 6 +- - .../acle/general-c/ternary_lane_rotate_1.c | 6 +- - .../sve/acle/general-c/ternary_long_lane_1.c | 6 +- - .../sve/acle/general-c/ternary_long_opt_n_1.c | 4 +- - .../sve/acle/general-c/ternary_opt_n_1.c | 4 +- - .../sve/acle/general-c/ternary_qq_lane_1.c | 6 +- - .../acle/general-c/ternary_qq_lane_rotate_1.c | 6 +- - .../sve/acle/general-c/ternary_qq_opt_n_2.c | 4 +- - .../sve/acle/general-c/ternary_qq_rotate_1.c | 6 +- - .../sve/acle/general-c/ternary_rotate_1.c | 6 +- - .../general-c/ternary_shift_right_imm_1.c | 4 +- - .../sve/acle/general-c/ternary_uint_1.c | 6 +- - .../sve/acle/general-c/ternary_uintq_intq_1.c | 6 +- - .../general-c/ternary_uintq_intq_lane_1.c | 6 +- - .../general-c/ternary_uintq_intq_opt_n_1.c | 4 +- - .../aarch64/sve/acle/general-c/tmad_1.c | 4 +- - .../aarch64/sve/acle/general-c/unary_1.c | 2 +- - .../aarch64/sve/acle/general-c/unary_2.c | 2 +- - .../sve/acle/general-c/unary_convert_1.c | 2 +- - .../sve/acle/general-c/unary_convert_2.c | 2 +- - .../acle/general-c/unary_convert_narrowt_1.c | 2 +- - .../sve/acle/general-c/unary_narrowb_1.c | 2 +- - .../acle/general-c/unary_narrowb_to_uint_1.c | 2 +- - .../sve/acle/general-c/unary_narrowt_1.c | 4 +- - .../acle/general-c/unary_narrowt_to_uint_1.c | 4 +- - .../sve/acle/general-c/unary_to_int_1.c | 2 +- - .../sve/acle/general-c/unary_to_uint_1.c | 2 +- - .../sve/acle/general-c/unary_to_uint_2.c | 2 +- - .../sve/acle/general-c/unary_to_uint_3.c | 2 +- - .../aarch64/sve/acle/general-c/unary_uint_1.c | 2 +- - .../sve/acle/general-c/unary_widen_1.c | 4 +- - 81 files changed, 195 insertions(+), 169 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc -index cc676bfe1..4e94e3633 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc -@@ -1228,6 +1228,32 @@ function_resolver::scalar_argument_p (unsigned int i) - || SCALAR_FLOAT_TYPE_P (type)); - } - -+/* Report that argument ARGNO was expected to have NUM_VECTORS vectors. -+ TYPE is the type that ARGNO actually has. */ -+void -+function_resolver::report_incorrect_num_vectors (unsigned int argno, -+ sve_type type, -+ unsigned int num_vectors) -+{ -+ if (num_vectors == 1) -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects a single SVE vector rather than a tuple", -+ get_vector_type (type), argno + 1, fndecl); -+ else if (type.num_vectors == 1 -+ && type.type != TYPE_SUFFIX_b) -+ /* num_vectors is always != 1, so the singular isn't needed. */ -+ error_n (location, num_vectors, "%qT%d%qE%d", -+ "passing single vector %qT to argument %d" -+ " of %qE, which expects a tuple of %d vectors", -+ get_vector_type (type), argno + 1, fndecl, num_vectors); -+ else -+ /* num_vectors is always != 1, so the singular isn't needed. */ -+ error_n (location, num_vectors, "%qT%d%qE%d", -+ "passing %qT to argument %d of %qE, which" -+ " expects a tuple of %d vectors", get_vector_type (type), -+ argno + 1, fndecl, num_vectors); -+} -+ - /* Report that the function has no form that takes type TYPE. - Return error_mark_node. */ - tree -@@ -1372,6 +1398,30 @@ find_sve_type (const_tree type) - return {}; - } - -+/* Require argument ARGNO to be an SVE type (i.e. something that can be -+ represented by sve_type). Return the (valid) type if it is, otherwise -+ report an error and return an invalid type. */ -+sve_type -+function_resolver::infer_sve_type (unsigned int argno) -+{ -+ tree actual = get_argument_type (argno); -+ if (actual == error_mark_node) -+ return {}; -+ -+ if (sve_type type = find_sve_type (actual)) -+ return type; -+ -+ if (scalar_argument_p (argno)) -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects an SVE type rather than a scalar type", -+ actual, argno + 1, fndecl); -+ else -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects an SVE type", -+ actual, argno + 1, fndecl); -+ return {}; -+} -+ - /* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS - vectors; NUM_VECTORS is 1 for the former. Return the associated type - suffix on success, using TYPE_SUFFIX_b for predicates. Report an error -@@ -1380,41 +1430,14 @@ type_suffix_index - function_resolver::infer_vector_or_tuple_type (unsigned int argno, - unsigned int num_vectors) - { -- tree actual = get_argument_type (argno); -- if (actual == error_mark_node) -+ auto type = infer_sve_type (argno); -+ if (!type) - return NUM_TYPE_SUFFIXES; - -- if (auto sve_type = find_sve_type (actual)) -- { -- if (sve_type.num_vectors == num_vectors) -- return sve_type.type; -- -- if (num_vectors == 1) -- error_at (location, "passing %qT to argument %d of %qE, which" -- " expects a single SVE vector rather than a tuple", -- actual, argno + 1, fndecl); -- else if (sve_type.num_vectors == 1 -- && sve_type.type != TYPE_SUFFIX_b) -- /* num_vectors is always != 1, so the singular isn't needed. */ -- error_n (location, num_vectors, "%qT%d%qE%d", -- "passing single vector %qT to argument %d" -- " of %qE, which expects a tuple of %d vectors", -- actual, argno + 1, fndecl, num_vectors); -- else -- /* num_vectors is always != 1, so the singular isn't needed. */ -- error_n (location, num_vectors, "%qT%d%qE%d", -- "passing %qT to argument %d of %qE, which" -- " expects a tuple of %d vectors", actual, argno + 1, -- fndecl, num_vectors); -- return NUM_TYPE_SUFFIXES; -- } -+ if (type.num_vectors == num_vectors) -+ return type.type; - -- if (num_vectors == 1) -- error_at (location, "passing %qT to argument %d of %qE, which" -- " expects an SVE vector type", actual, argno + 1, fndecl); -- else -- error_at (location, "passing %qT to argument %d of %qE, which" -- " expects an SVE tuple type", actual, argno + 1, fndecl); -+ report_incorrect_num_vectors (argno, type, num_vectors); - return NUM_TYPE_SUFFIXES; - } - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h -index f4f2c415f..5a4f35123 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.h -+++ b/gcc/config/aarch64/aarch64-sve-builtins.h -@@ -451,6 +451,8 @@ public: - tree get_argument_type (unsigned int); - bool scalar_argument_p (unsigned int); - -+ void report_incorrect_num_vectors (unsigned int, sve_type, unsigned int); -+ - tree report_no_such_form (sve_type); - tree lookup_form (mode_suffix_index, - type_suffix_index = NUM_TYPE_SUFFIXES, -@@ -463,6 +465,7 @@ public: - - type_suffix_index infer_integer_scalar_type (unsigned int); - type_suffix_index infer_pointer_type (unsigned int, bool = false); -+ sve_type infer_sve_type (unsigned int); - type_suffix_index infer_vector_or_tuple_type (unsigned int, unsigned int); - type_suffix_index infer_vector_type (unsigned int); - type_suffix_index infer_integer_vector_type (unsigned int); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_index_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_index_1.c -index 714265ed1..a17e99f5d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_index_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_index_1.c -@@ -10,14 +10,14 @@ f1 (svbool_t pg, uint32_t *u32_ptr, svuint8_t u8, svuint16_t u16, - { - svadrh_index (u32); /* { dg-error {too few arguments to function 'svadrh_index'} } */ - svadrh_index (u32, u32, u32); /* { dg-error {too many arguments to function 'svadrh_index'} } */ -- svadrh_index (u32_ptr, s32); /* { dg-error {passing '[^']*\*'[^\n]* to argument 1 of 'svadrh_index', which expects an SVE vector type} } */ -- svadrh_index (0, s32); /* { dg-error {passing 'int' to argument 1 of 'svadrh_index', which expects an SVE vector type} } */ -+ svadrh_index (u32_ptr, s32); /* { dg-error {passing '[^']*\*'[^\n]* to argument 1 of 'svadrh_index', which expects an SVE type} } */ -+ svadrh_index (0, s32); /* { dg-error {passing 'int' to argument 1 of 'svadrh_index', which expects an SVE type rather than a scalar} } */ - svadrh_index (u16, u16); /* { dg-error {passing 'svuint16_t' to argument 1 of 'svadrh_index', which expects 'svuint32_t' or 'svuint64_t'} } */ - svadrh_index (s32, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svadrh_index', which expects 'svuint32_t' or 'svuint64_t'} } */ - svadrh_index (f32, s32); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svadrh_index', which expects 'svuint32_t' or 'svuint64_t'} } */ - svadrh_index (pg, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadrh_index', which expects 'svuint32_t' or 'svuint64_t'} } */ - -- svadrh_index (u32, 0); /* { dg-error {passing 'int' to argument 2 of 'svadrh_index', which expects an SVE vector type} } */ -+ svadrh_index (u32, 0); /* { dg-error {passing 'int' to argument 2 of 'svadrh_index', which expects an SVE type rather than a scalar} } */ - svadrh_index (u32, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svadrh_index', which expects a vector of 32-bit or 64-bit integers} } */ - svadrh_index (u32, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svadrh_index', which expects a vector of 32-bit or 64-bit integers} } */ - svadrh_index (u32, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadrh_index', which expects a vector of integers} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_offset_1.c -index 528d7ac51..627ae8ac5 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_offset_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_offset_1.c -@@ -10,14 +10,14 @@ f1 (svbool_t pg, uint32_t *u32_ptr, svuint8_t u8, svuint16_t u16, - { - svadrb_offset (u32); /* { dg-error {too few arguments to function 'svadrb_offset'} } */ - svadrb_offset (u32, u32, u32); /* { dg-error {too many arguments to function 'svadrb_offset'} } */ -- svadrb_offset (u32_ptr, s32); /* { dg-error {passing '[^']*\*'[^\n]* to argument 1 of 'svadrb_offset', which expects an SVE vector type} } */ -- svadrb_offset (0, s32); /* { dg-error {passing 'int' to argument 1 of 'svadrb_offset', which expects an SVE vector type} } */ -+ svadrb_offset (u32_ptr, s32); /* { dg-error {passing '[^']*\*'[^\n]* to argument 1 of 'svadrb_offset', which expects an SVE type} } */ -+ svadrb_offset (0, s32); /* { dg-error {passing 'int' to argument 1 of 'svadrb_offset', which expects an SVE type rather than a scalar} } */ - svadrb_offset (u16, u16); /* { dg-error {passing 'svuint16_t' to argument 1 of 'svadrb_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ - svadrb_offset (s32, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svadrb_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ - svadrb_offset (f32, s32); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svadrb_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ - svadrb_offset (pg, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadrb_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ - -- svadrb_offset (u32, 0); /* { dg-error {passing 'int' to argument 2 of 'svadrb_offset', which expects an SVE vector type} } */ -+ svadrb_offset (u32, 0); /* { dg-error {passing 'int' to argument 2 of 'svadrb_offset', which expects an SVE type rather than a scalar} } */ - svadrb_offset (u32, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svadrb_offset', which expects a vector of 32-bit or 64-bit integers} } */ - svadrb_offset (u32, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svadrb_offset', which expects a vector of 32-bit or 64-bit integers} } */ - svadrb_offset (u32, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadrb_offset', which expects a vector of integers} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c -index 8ce89fa10..4343146de 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c -@@ -10,5 +10,5 @@ f1 (svbool_t pg, svuint8_t u8, svint16_t s16) - svzip1 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svzip1', but previous arguments had type 'svbool_t'} } */ - svzip1 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */ - svzip1 (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */ -- svzip1 (u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svzip1', which expects an SVE vector type} } */ -+ svzip1 (u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svzip1', which expects an SVE type rather than a scalar} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_n.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_n.c -index 965e9a13c..9902379f6 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_n.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_n.c -@@ -11,7 +11,7 @@ f1 (svbool_t pg, svfloat16_t f16, svint16_t s16, svuint16_t u16, - svscale_x (s32, f16, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svscale_x', which expects 'svbool_t'} } */ - svscale_x (1, f16, s32); /* { dg-error {passing 'int' to argument 1 of 'svscale_x', which expects 'svbool_t'} } */ - svscale_x (pg, pg, s16); /* { dg-error {'svscale_x' has no form that takes 'svbool_t' arguments} } */ -- svscale_x (pg, 1, s16); /* { dg-error {passing 'int' to argument 2 of 'svscale_x', which expects an SVE vector type} } */ -+ svscale_x (pg, 1, s16); /* { dg-error {passing 'int' to argument 2 of 'svscale_x', which expects an SVE type rather than a scalar} } */ - svscale_x (pg, f16, s16); - svscale_x (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svscale_x', which expects a vector of signed integers} } */ - svscale_x (pg, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svscale_x', which expects a vector of signed integers} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c -index 3913ff63d..10b6b7e81 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c -@@ -10,8 +10,8 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, - svmul_lane (f32, f32, 0, 0); /* { dg-error {too many arguments to function 'svmul_lane'} } */ - svmul_lane (pg, pg, 0); /* { dg-error {'svmul_lane' has no form that takes 'svbool_t' arguments} } */ - svmul_lane (s32, s32, 0); /* { dg-error {ACLE function 'svmul_lane_s32' requires ISA extension 'sve2'} "" { xfail aarch64_sve2 } } */ -- svmul_lane (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmul_lane', which expects an SVE vector type} } */ -- svmul_lane (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svmul_lane', which expects an SVE vector type} } */ -+ svmul_lane (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmul_lane', which expects an SVE type rather than a scalar} } */ -+ svmul_lane (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svmul_lane', which expects an SVE type rather than a scalar} } */ - svmul_lane (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmul_lane', but previous arguments had type 'svfloat32_t'} } */ - svmul_lane (f32, f32, s32); /* { dg-error {argument 3 of 'svmul_lane' must be an integer constant expression} } */ - svmul_lane (f32, f32, i); /* { dg-error {argument 3 of 'svmul_lane' must be an integer constant expression} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c -index bfe78088b..805863f76 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c -@@ -19,8 +19,8 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, - svmullb_lane (f16, f16, 0); /* { dg-error {'svmullb_lane' has no form that takes 'svfloat16_t' arguments} } */ - svmullb_lane (f32, f32, 0); /* { dg-error {'svmullb_lane' has no form that takes 'svfloat32_t' arguments} } */ - svmullb_lane (f64, f64, 0); /* { dg-error {'svmullb_lane' has no form that takes 'svfloat64_t' arguments} } */ -- svmullb_lane (1, u32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmullb_lane', which expects an SVE vector type} } */ -- svmullb_lane (u32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svmullb_lane', which expects an SVE vector type} } */ -+ svmullb_lane (1, u32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmullb_lane', which expects an SVE type rather than a scalar} } */ -+ svmullb_lane (u32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svmullb_lane', which expects an SVE type rather than a scalar} } */ - svmullb_lane (u32, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmullb_lane', but previous arguments had type 'svuint32_t'} } */ - svmullb_lane (u32, u32, s32); /* { dg-error {argument 3 of 'svmullb_lane' must be an integer constant expression} } */ - svmullb_lane (u32, u32, i); /* { dg-error {argument 3 of 'svmullb_lane' must be an integer constant expression} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c -index 27893c6fb..ee704eeae 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c -@@ -23,7 +23,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svaddlb (u64, u64); /* { dg-error {'svaddlb' has no form that takes 'svuint64_t' arguments} } */ - svaddlb (s64, s64); /* { dg-error {'svaddlb' has no form that takes 'svint64_t' arguments} } */ - svaddlb (f16, f16); /* { dg-error {'svaddlb' has no form that takes 'svfloat16_t' arguments} } */ -- svaddlb (1, u8); /* { dg-error {passing 'int' to argument 1 of 'svaddlb', which expects an SVE vector type} } */ -+ svaddlb (1, u8); /* { dg-error {passing 'int' to argument 1 of 'svaddlb', which expects an SVE type rather than a scalar} } */ - svaddlb (u8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint8_t'} } */ - svaddlb (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint8_t'} } */ - svaddlb (u8, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint8_t'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_n_1.c -index 0c69e66a1..ff4f0ff75 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_n_1.c -@@ -7,7 +7,7 @@ f1 (svbool_t pg, svuint8_t u8, svfloat16_t f16, int i, float f) - { - svinsr (u8); /* { dg-error {too few arguments to function 'svinsr'} } */ - svinsr (u8, 0, 0); /* { dg-error {too many arguments to function 'svinsr'} } */ -- svinsr (0, 0); /* { dg-error {passing 'int' to argument 1 of 'svinsr', which expects an SVE vector type} } */ -+ svinsr (0, 0); /* { dg-error {passing 'int' to argument 1 of 'svinsr', which expects an SVE type rather than a scalar} } */ - svinsr (u8, 0); - svinsr (u8, -1); - svinsr (u8, i); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c -index 920cbd1b0..8ca549ba9 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c -@@ -23,7 +23,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svaddhnb (u64, u64); - svaddhnb (s64, s64); - svaddhnb (f32, f32); /* { dg-error {'svaddhnb' has no form that takes 'svfloat32_t' arguments} } */ -- svaddhnb (1, u16); /* { dg-error {passing 'int' to argument 1 of 'svaddhnb', which expects an SVE vector type} } */ -+ svaddhnb (1, u16); /* { dg-error {passing 'int' to argument 1 of 'svaddhnb', which expects an SVE type rather than a scalar} } */ - svaddhnb (u16, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */ - svaddhnb (u16, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */ - svaddhnb (u16, u32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c -index eb70d058e..2b537965b 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c -@@ -26,8 +26,8 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svaddhnt (u32, u64, u64); - svaddhnt (s32, s64, s64); - svaddhnt (f16, f32, f32); /* { dg-error {'svaddhnt' has no form that takes 'svfloat32_t' arguments} } */ -- svaddhnt (1, u16, u16); /* { dg-error {passing 'int' to argument 1 of 'svaddhnt', which expects an SVE vector type} } */ -- svaddhnt (u8, 1, u16); /* { dg-error {passing 'int' to argument 2 of 'svaddhnt', which expects an SVE vector type} } */ -+ svaddhnt (1, u16, u16); /* { dg-error {passing 'int' to argument 1 of 'svaddhnt', which expects an SVE type rather than a scalar} } */ -+ svaddhnt (u8, 1, u16); /* { dg-error {passing 'int' to argument 2 of 'svaddhnt', which expects an SVE type rather than a scalar} } */ - svaddhnt (u8, u16, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */ - svaddhnt (u8, u16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */ - svaddhnt (u8, u16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c -index 9fa83ca99..a151f90d1 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c -@@ -10,7 +10,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svadd_x (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svadd_x'} } */ - svadd_x (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svadd_x', which expects 'svbool_t'} } */ - svadd_x (pg, pg, pg); /* { dg-error {'svadd_x' has no form that takes 'svbool_t' arguments} } */ -- svadd_x (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svadd_x', which expects an SVE vector type} } */ -+ svadd_x (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svadd_x', which expects an SVE type rather than a scalar} } */ - svadd_x (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ - svadd_x (pg, u8, u8); - svadd_x (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c -index 4d0b253e3..70ec9c585 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c -@@ -10,7 +10,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svand_z (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svand_z'} } */ - svand_z (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svand_z', which expects 'svbool_t'} } */ - svand_z (pg, pg, pg); -- svand_z (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svand_z', which expects an SVE vector type} } */ -+ svand_z (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svand_z', which expects an SVE type rather than a scalar} } */ - svand_z (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ - svand_z (pg, u8, u8); - svand_z (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c -index 8ffe91bce..7669e4a02 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c -@@ -10,8 +10,8 @@ f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i) - svcadd_x (f32, f32, f32, 90); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svcadd_x', which expects 'svbool_t'} } */ - svcadd_x (pg, pg, pg, 90); /* { dg-error {'svcadd_x' has no form that takes 'svbool_t' arguments} } */ - svcadd_x (pg, s32, s32, 90); /* { dg-error {'svcadd_x' has no form that takes 'svint32_t' arguments} } */ -- svcadd_x (pg, 1, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcadd_x', which expects an SVE vector type} } */ -- svcadd_x (pg, f32, 1, 90); /* { dg-error {passing 'int' to argument 3 of 'svcadd_x', which expects an SVE vector type} } */ -+ svcadd_x (pg, 1, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcadd_x', which expects an SVE type rather than a scalar} } */ -+ svcadd_x (pg, f32, 1, 90); /* { dg-error {passing 'int' to argument 3 of 'svcadd_x', which expects an SVE type rather than a scalar} } */ - svcadd_x (pg, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcadd_x', but previous arguments had type 'svfloat32_t'} } */ - svcadd_x (pg, f32, f32, s32); /* { dg-error {argument 4 of 'svcadd_x' must be an integer constant expression} } */ - svcadd_x (pg, f32, f32, i); /* { dg-error {argument 4 of 'svcadd_x' must be an integer constant expression} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c -index 213defc66..154662487 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c -@@ -11,9 +11,9 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32) - svhistcnt_z (pg, s32, s32, 0); /* { dg-error {too many arguments to function 'svhistcnt_z'} } */ - svhistcnt_z (0, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svhistcnt_z', which expects 'svbool_t'} } */ - svhistcnt_z (s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svhistcnt_z', which expects 'svbool_t'} } */ -- svhistcnt_z (pg, 0, s32); /* { dg-error {passing 'int' to argument 2 of 'svhistcnt_z', which expects an SVE vector type} } */ -+ svhistcnt_z (pg, 0, s32); /* { dg-error {passing 'int' to argument 2 of 'svhistcnt_z', which expects an SVE type rather than a scalar} } */ - svhistcnt_z (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svhistcnt_z', but previous arguments had type 'svbool_t'} } */ - svhistcnt_z (pg, s32, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svhistcnt_z', but previous arguments had type 'svint32_t'} } */ -- svhistcnt_z (pg, s32, 0); /* { dg-error {passing 'int' to argument 3 of 'svhistcnt_z', which expects an SVE vector type} } */ -+ svhistcnt_z (pg, s32, 0); /* { dg-error {passing 'int' to argument 3 of 'svhistcnt_z', which expects an SVE type rather than a scalar} } */ - svhistcnt_z (pg, pg, pg); /* { dg-error {'svhistcnt_z' has no form that takes 'svbool_t' arguments} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_n_1.c -index c8ca5f746..207552a3b 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_n_1.c -@@ -7,7 +7,7 @@ f1 (svbool_t pg, svuint8_t u8, int i, float f) - { - svdupq_lane (u8); /* { dg-error {too few arguments to function 'svdupq_lane'} } */ - svdupq_lane (u8, 0, 0); /* { dg-error {too many arguments to function 'svdupq_lane'} } */ -- svdupq_lane (0, 0); /* { dg-error {passing 'int' to argument 1 of 'svdupq_lane', which expects an SVE vector type} } */ -+ svdupq_lane (0, 0); /* { dg-error {passing 'int' to argument 1 of 'svdupq_lane', which expects an SVE type rather than a scalar} } */ - svdupq_lane (u8, 0); - svdupq_lane (u8, -1); - svdupq_lane (u8, i); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_2.c -index be217394f..c661a66f3 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_2.c -@@ -8,7 +8,7 @@ f1 (svbool_t pg, svuint8_t u8, svuint64_t u64) - svlsl_wide_x (pg, u8); /* { dg-error {too few arguments to function 'svlsl_wide_x'} } */ - svlsl_wide_x (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svlsl_wide_x'} } */ - svlsl_wide_x (u8, u8, u64); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svlsl_wide_x', which expects 'svbool_t'} } */ -- svlsl_wide_x (pg, 1, u64); /* { dg-error {passing 'int' to argument 2 of 'svlsl_wide_x', which expects an SVE vector type} } */ -+ svlsl_wide_x (pg, 1, u64); /* { dg-error {passing 'int' to argument 2 of 'svlsl_wide_x', which expects an SVE type rather than a scalar} } */ - svlsl_wide_x (pg, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svlsl_wide_x', which expects 'svuint64_t'} } */ - svlsl_wide_x (pg, u64, u64); /* { dg-error {'svlsl_wide_x' has no form that takes 'svuint64_t' arguments} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_1.c -index 8f86c50b6..8493d5d68 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_1.c -@@ -11,7 +11,7 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svuint16_t u16, svint16_t s16, - svtbl (pg, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ - svtbl (pg, u8); /* { dg-error {'svtbl' has no form that takes 'svbool_t' arguments} } */ - -- svtbl (u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svtbl', which expects an SVE vector type} } */ -+ svtbl (u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svtbl', which expects an SVE type rather than a scalar} } */ - svtbl (u8, u8); - svtbl (u8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ - svtbl (u8, u16); /* { dg-error {arguments 1 and 2 of 'svtbl' must have the same element size, but the values passed here have type 'svuint8_t' and 'svuint16_t' respectively} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_n_1.c -index 36a902e69..d74cb46f7 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_n_1.c -@@ -7,7 +7,7 @@ f1 (svbool_t pg, svuint8_t u8, int i, float f) - { - svdup_lane (u8); /* { dg-error {too few arguments to function 'svdup_lane'} } */ - svdup_lane (u8, 0, 0); /* { dg-error {too many arguments to function 'svdup_lane'} } */ -- svdup_lane (0, 0); /* { dg-error {passing 'int' to argument 1 of 'svdup_lane', which expects an SVE vector type} } */ -+ svdup_lane (0, 0); /* { dg-error {passing 'int' to argument 1 of 'svdup_lane', which expects an SVE type rather than a scalar} } */ - svdup_lane (u8, 0); - svdup_lane (u8, -1); - svdup_lane (u8, i); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_opt_n_1.c -index b162ab405..f44d7a9fa 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_opt_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_opt_n_1.c -@@ -11,7 +11,7 @@ f1 (svbool_t pg, svfloat16_t f16, svint16_t s16, svuint16_t u16, - svlsl_x (s32, s32, u32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svlsl_x', which expects 'svbool_t'} } */ - svlsl_x (1, s32, u32); /* { dg-error {passing 'int' to argument 1 of 'svlsl_x', which expects 'svbool_t'} } */ - svlsl_x (pg, pg, u16); /* { dg-error {'svlsl_x' has no form that takes 'svbool_t' arguments} } */ -- svlsl_x (pg, 1, s16); /* { dg-error {passing 'int' to argument 2 of 'svlsl_x', which expects an SVE vector type} } */ -+ svlsl_x (pg, 1, s16); /* { dg-error {passing 'int' to argument 2 of 'svlsl_x', which expects an SVE type rather than a scalar} } */ - svlsl_x (pg, s16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svlsl_x', which expects a vector of unsigned integers} } */ - svlsl_x (pg, s16, u16); - svlsl_x (pg, s16, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svlsl_x', which expects a vector of unsigned integers} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_wide_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_wide_1.c -index f58ab75d7..ba38361ab 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_wide_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_wide_1.c -@@ -30,8 +30,8 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svadalp_m (pg, s16, s8); - svadalp_m (pg, f32, f16); /* { dg-error {'svadalp_m' has no form that takes 'svfloat32_t' arguments} } */ - svadalp_m (pg, f16, f32); /* { dg-error {'svadalp_m' has no form that takes 'svfloat16_t' arguments} } */ -- svadalp_m (pg, 0, u32); /* { dg-error {passing 'int' to argument 2 of 'svadalp_m', which expects an SVE vector type} } */ -- svadalp_m (pg, 0, u64); /* { dg-error {passing 'int' to argument 2 of 'svadalp_m', which expects an SVE vector type} } */ -- svadalp_m (pg, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svadalp_m', which expects an SVE vector type} } */ -- svadalp_m (pg, u16, 0); /* { dg-error {passing 'int' to argument 3 of 'svadalp_m', which expects an SVE vector type} } */ -+ svadalp_m (pg, 0, u32); /* { dg-error {passing 'int' to argument 2 of 'svadalp_m', which expects an SVE type rather than a scalar} } */ -+ svadalp_m (pg, 0, u64); /* { dg-error {passing 'int' to argument 2 of 'svadalp_m', which expects an SVE type rather than a scalar} } */ -+ svadalp_m (pg, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svadalp_m', which expects an SVE type rather than a scalar} } */ -+ svadalp_m (pg, u16, 0); /* { dg-error {passing 'int' to argument 3 of 'svadalp_m', which expects an SVE type rather than a scalar} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_wide_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_wide_opt_n_1.c -index 5a58211a0..fd27d8559 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_wide_opt_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_wide_opt_n_1.c -@@ -27,8 +27,8 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svaddwb (s16, s8); - svaddwb (f32, f16); /* { dg-error {'svaddwb' has no form that takes 'svfloat32_t' arguments} } */ - svaddwb (f16, f32); /* { dg-error {'svaddwb' has no form that takes 'svfloat16_t' arguments} } */ -- svaddwb (0, u32); /* { dg-error {passing 'int' to argument 1 of 'svaddwb', which expects an SVE vector type} } */ -- svaddwb (0, u64); /* { dg-error {passing 'int' to argument 1 of 'svaddwb', which expects an SVE vector type} } */ -+ svaddwb (0, u32); /* { dg-error {passing 'int' to argument 1 of 'svaddwb', which expects an SVE type rather than a scalar} } */ -+ svaddwb (0, u64); /* { dg-error {passing 'int' to argument 1 of 'svaddwb', which expects an SVE type rather than a scalar} } */ - svaddwb (u8, 0); /* { dg-error {'svaddwb' has no form that takes 'svuint8_t' arguments} } */ - svaddwb (u16, 0); - svaddwb (u32, 0); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c -index cb9ac946c..ba1b2520f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c -@@ -6,10 +6,10 @@ test (svbool_t pg, svint32_t s32, svint64_t s64, int i) - svclasta (pg, 1); /* { dg-error {too few arguments to function 'svclasta'} } */ - svclasta (pg, 1, s32, 1); /* { dg-error {too many arguments to function 'svclasta'} } */ - svclasta (1, 1, s32); /* { dg-error {passing 'int' to argument 1 of 'svclasta', which expects 'svbool_t'} } */ -- svclasta (pg, 1, 1); /* { dg-error {passing 'int' to argument 3 of 'svclasta', which expects an SVE vector type} } */ -+ svclasta (pg, 1, 1); /* { dg-error {passing 'int' to argument 3 of 'svclasta', which expects an SVE type rather than a scalar} } */ - svclasta (pg, 1, pg); /* { dg-error {'svclasta' has no form that takes 'svbool_t' arguments} } */ - svclasta (pg, i, s32); -- svclasta (pg, s32, 1); /* { dg-error {passing 'int' to argument 3 of 'svclasta', which expects an SVE vector type} } */ -+ svclasta (pg, s32, 1); /* { dg-error {passing 'int' to argument 3 of 'svclasta', which expects an SVE type rather than a scalar} } */ - svclasta (pg, s32, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svclasta', but previous arguments had type 'svint32_t'} } */ - svclasta (pg, pg, pg); /* { dg-error {'svclasta' has no form that takes 'svbool_t' arguments} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c -index 12511a85b..5474124cc 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c -@@ -12,14 +12,14 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svmatch (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svmatch'} } */ - svmatch (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svmatch', which expects 'svbool_t'} } */ - svmatch (pg, pg, pg); /* { dg-error {'svmatch' has no form that takes 'svbool_t' arguments} } */ -- svmatch (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svmatch', which expects an SVE vector type} } */ -+ svmatch (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svmatch', which expects an SVE type rather than a scalar} } */ - svmatch (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ - svmatch (pg, u8, u8); - svmatch (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ - svmatch (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ - svmatch (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ - svmatch (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ -- svmatch (pg, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmatch', which expects an SVE vector type} } */ -+ svmatch (pg, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmatch', which expects an SVE type rather than a scalar} } */ - - svmatch (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svfloat16_t'} } */ - svmatch (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svfloat16_t'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c -index 71c8e86d5..6faa73972 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c -@@ -10,7 +10,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svcmpeq (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svcmpeq'} } */ - svcmpeq (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svcmpeq', which expects 'svbool_t'} } */ - svcmpeq (pg, pg, pg); /* { dg-error {'svcmpeq' has no form that takes 'svbool_t' arguments} } */ -- svcmpeq (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svcmpeq', which expects an SVE vector type} } */ -+ svcmpeq (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svcmpeq', which expects an SVE type rather than a scalar} } */ - svcmpeq (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ - svcmpeq (pg, u8, u8); - svcmpeq (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_wide_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_wide_opt_n_1.c -index fc5e45663..655f03360 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_wide_opt_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_wide_opt_n_1.c -@@ -9,7 +9,7 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svint64_t s64, svuint64_t u64, - svcmpeq_wide (pg, s8); /* { dg-error {too few arguments to function 'svcmpeq_wide'} } */ - svcmpeq_wide (pg, s8, s64, s8); /* { dg-error {too many arguments to function 'svcmpeq_wide'} } */ - svcmpeq_wide (s8, s8, s64); /* { dg-error {passing 'svint8_t' to argument 1 of 'svcmpeq_wide', which expects 'svbool_t'} } */ -- svcmpeq_wide (pg, 0, s64); /* { dg-error {passing 'int' to argument 2 of 'svcmpeq_wide', which expects an SVE vector type} } */ -+ svcmpeq_wide (pg, 0, s64); /* { dg-error {passing 'int' to argument 2 of 'svcmpeq_wide', which expects an SVE type rather than a scalar} } */ - svcmpeq_wide (pg, s8, 0); - svcmpeq_wide (pg, s8, x); - svcmpeq_wide (pg, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svcmpeq_wide', which expects a vector of 64-bit elements} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_vector_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_vector_1.c -index daf9e0d5b..b57d9de1d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_vector_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_vector_1.c -@@ -7,7 +7,7 @@ f1 (svbool_t pg, svuint32_t u32, svuint32x2_t u32x2) - { - svlen (); /* { dg-error {too few arguments to function 'svlen'} } */ - svlen (u32, u32); /* { dg-error {too many arguments to function 'svlen'} } */ -- svlen (0); /* { dg-error {passing 'int' to argument 1 of 'svlen', which expects an SVE vector type} } */ -+ svlen (0); /* { dg-error {passing 'int' to argument 1 of 'svlen', which expects an SVE type rather than a scalar} } */ - svlen (pg); /* { dg-error {'svlen' has no form that takes 'svbool_t' arguments} } */ - svlen (u32x2); /* { dg-error {passing 'svuint32x2_t' to argument 1 of 'svlen', which expects a single SVE vector rather than a tuple} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c -index 31321a046..83e4a5600 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c -@@ -12,8 +12,8 @@ f1 (svuint8x2_t *ptr, svbool_t pg, svuint8_t u8, svfloat64_t f64, - *ptr = svcreate2 (u8x2, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 1 of 'svcreate2', which expects a single SVE vector rather than a tuple} } */ - *ptr = svcreate2 (u8, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcreate2', but previous arguments had type 'svuint8_t'} } */ - *ptr = svcreate2 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate2', but previous arguments had type 'svuint8_t'} } */ -- *ptr = svcreate2 (u8, x); /* { dg-error {passing 'int' to argument 2 of 'svcreate2', which expects an SVE vector type} } */ -- *ptr = svcreate2 (x, u8); /* { dg-error {passing 'int' to argument 1 of 'svcreate2', which expects an SVE vector type} } */ -+ *ptr = svcreate2 (u8, x); /* { dg-error {passing 'int' to argument 2 of 'svcreate2', which expects an SVE type rather than a scalar} } */ -+ *ptr = svcreate2 (x, u8); /* { dg-error {passing 'int' to argument 1 of 'svcreate2', which expects an SVE type rather than a scalar} } */ - *ptr = svcreate2 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svcreate2', but previous arguments had type 'svbool_t'} } */ - *ptr = svcreate2 (pg, pg); /* { dg-error {'svcreate2' has no form that takes 'svbool_t' arguments} } */ - *ptr = svcreate2 (u8, u8); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c -index a88e56b31..e3302f7e7 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c -@@ -13,8 +13,8 @@ f1 (svfloat16x3_t *ptr, svbool_t pg, svfloat16_t f16, svfloat64_t f64, - *ptr = svcreate3 (f16x3, f16x3, f16x3); /* { dg-error {passing 'svfloat16x3_t' to argument 1 of 'svcreate3', which expects a single SVE vector rather than a tuple} } */ - *ptr = svcreate3 (f16, f16, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcreate3', but previous arguments had type 'svfloat16_t'} } */ - *ptr = svcreate3 (f16, pg, f16); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate3', but previous arguments had type 'svfloat16_t'} } */ -- *ptr = svcreate3 (f16, x, f16); /* { dg-error {passing 'int' to argument 2 of 'svcreate3', which expects an SVE vector type} } */ -- *ptr = svcreate3 (x, f16, f16); /* { dg-error {passing 'int' to argument 1 of 'svcreate3', which expects an SVE vector type} } */ -+ *ptr = svcreate3 (f16, x, f16); /* { dg-error {passing 'int' to argument 2 of 'svcreate3', which expects an SVE type rather than a scalar} } */ -+ *ptr = svcreate3 (x, f16, f16); /* { dg-error {passing 'int' to argument 1 of 'svcreate3', which expects an SVE type rather than a scalar} } */ - *ptr = svcreate3 (pg, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svcreate3', but previous arguments had type 'svbool_t'} } */ - *ptr = svcreate3 (pg, pg, pg); /* { dg-error {'svcreate3' has no form that takes 'svbool_t' arguments} } */ - *ptr = svcreate3 (f16, f16, f16); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c -index fed124506..c850c94f0 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c -@@ -14,8 +14,8 @@ f1 (svint32x4_t *ptr, svbool_t pg, svint32_t s32, svfloat64_t f64, - *ptr = svcreate4 (s32x4, s32x4, s32x4, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 1 of 'svcreate4', which expects a single SVE vector rather than a tuple} } */ - *ptr = svcreate4 (s32, s32, s32, f64); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcreate4', but previous arguments had type 'svint32_t'} } */ - *ptr = svcreate4 (s32, s32, pg, s32); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcreate4', but previous arguments had type 'svint32_t'} } */ -- *ptr = svcreate4 (s32, x, s32, s32); /* { dg-error {passing 'int' to argument 2 of 'svcreate4', which expects an SVE vector type} } */ -- *ptr = svcreate4 (x, s32, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svcreate4', which expects an SVE vector type} } */ -+ *ptr = svcreate4 (s32, x, s32, s32); /* { dg-error {passing 'int' to argument 2 of 'svcreate4', which expects an SVE type rather than a scalar} } */ -+ *ptr = svcreate4 (x, s32, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svcreate4', which expects an SVE type rather than a scalar} } */ - *ptr = svcreate4 (pg, s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcreate4', but previous arguments had type 'svbool_t'} } */ - *ptr = svcreate4 (pg, pg, pg, pg); /* { dg-error {'svcreate4' has no form that takes 'svbool_t' arguments} } */ - *ptr = svcreate4 (s32, s32, s32, s32); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/fold_left_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/fold_left_1.c -index 1d292786d..181d1b01b 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/fold_left_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/fold_left_1.c -@@ -15,7 +15,7 @@ f1 (svbool_t pg, int i, float f, double d, void *ptr, svfloat32_t f32, - svadda (pg, ptr, f32); /* { dg-error {incompatible type for argument 2 of 'svadda_f32'} } */ - svadda (pg, pg, f32); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadda', which expects a scalar element} } */ - svadda (pg, f32, f32); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svadda', which expects a scalar element} } */ -- svadda (pg, f, f); /* { dg-error {passing 'float' to argument 3 of 'svadda', which expects an SVE vector type} } */ -+ svadda (pg, f, f); /* { dg-error {passing 'float' to argument 3 of 'svadda', which expects an SVE type rather than a scalar} } */ - svadda (pg, i, i32); /* { dg-error {'svadda' has no form that takes 'svint32_t' arguments} } */ -- svadda (pg, i, i); /* { dg-error {passing 'int' to argument 3 of 'svadda', which expects an SVE vector type} } */ -+ svadda (pg, i, i); /* { dg-error {passing 'int' to argument 3 of 'svadda', which expects an SVE type rather than a scalar} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_1.c -index a61afcd2d..4de082d01 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_1.c -@@ -7,7 +7,7 @@ test (svbool_t pg, svint8_t s8, svuint8_t u8, - { - svqincp (s32); /* { dg-error {too few arguments to function 'svqincp'} } */ - svqincp (s32, pg, pg); /* { dg-error {too many arguments to function 'svqincp'} } */ -- svqincp (i, pg); /* { dg-error {passing 'int' to argument 1 of 'svqincp', which expects an SVE vector type} } */ -+ svqincp (i, pg); /* { dg-error {passing 'int' to argument 1 of 'svqincp', which expects an SVE type rather than a scalar} } */ - svqincp (pg, pg); /* { dg-error {'svqincp' has no form that takes 'svbool_t' arguments} } */ - svqincp (s8, pg); /* { dg-error {'svqincp' has no form that takes 'svint8_t' arguments} } */ - svqincp (u8, pg); /* { dg-error {'svqincp' has no form that takes 'svuint8_t' arguments} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c -index 5b0b00e96..7fc7bb67b 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c -@@ -23,22 +23,22 @@ f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, svint32_t s32, - { - svmmla (s32, s8); /* { dg-error {too few arguments to function 'svmmla'} } */ - svmmla (s32, s8, s8, s8); /* { dg-error {too many arguments to function 'svmmla'} } */ -- svmmla (0, s8, s8); /* { dg-error {passing 'int' to argument 1 of 'svmmla', which expects an SVE vector type} } */ -+ svmmla (0, s8, s8); /* { dg-error {passing 'int' to argument 1 of 'svmmla', which expects an SVE type rather than a scalar} } */ - svmmla (pg, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svbool_t' arguments} } */ - svmmla (u8, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svuint8_t' arguments} } */ - -- svmmla (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svmmla', which expects an SVE vector type} } */ -+ svmmla (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svmmla', which expects an SVE type rather than a scalar} } */ - svmmla (s32, u8, s8); /* { dg-error {arguments 1 and 2 of 'svmmla' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ - svmmla (s32, s8, u8); /* { dg-error {arguments 1 and 3 of 'svmmla' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ -- svmmla (s32, s8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmmla', which expects an SVE vector type} } */ -+ svmmla (s32, s8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmmla', which expects an SVE type rather than a scalar} } */ - svmmla (s32, s8, s8); - svmmla (s32, s32, s32); /* { dg-error {passing 'svint32_t' instead of the expected 'svint8_t' to argument 2 of 'svmmla', after passing 'svint32_t' to argument 1} } */ - svmmla (s32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svint8_t' to argument 2 of 'svmmla', after passing 'svint32_t' to argument 1} } */ - -- svmmla (u32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svmmla', which expects an SVE vector type} } */ -+ svmmla (u32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svmmla', which expects an SVE type rather than a scalar} } */ - svmmla (u32, s8, u8); /* { dg-error {arguments 1 and 2 of 'svmmla' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ - svmmla (u32, u8, s8); /* { dg-error {arguments 1 and 3 of 'svmmla' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ -- svmmla (u32, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmmla', which expects an SVE vector type} } */ -+ svmmla (u32, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmmla', which expects an SVE type rather than a scalar} } */ - svmmla (u32, u8, u8); - svmmla (u32, s32, s32); /* { dg-error {passing 'svint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svmmla', after passing 'svuint32_t' to argument 1} } */ - svmmla (u32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svmmla', after passing 'svuint32_t' to argument 1} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c -index b74721fad..88e0c35e7 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c -@@ -12,7 +12,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svprfb_gather (pg, u32); /* { dg-error {too few arguments to function 'svprfb_gather'} } */ - svprfb_gather (pg, u32, SV_PLDL1KEEP, 0); /* { dg-error {too many arguments to function 'svprfb_gather'} } */ - svprfb_gather (0, u32, SV_PLDL1KEEP); /* { dg-error {passing 'int' to argument 1 of 'svprfb_gather', which expects 'svbool_t'} } */ -- svprfb_gather (pg, 0, SV_PLDL1KEEP); /* { dg-error {passing 'int' to argument 2 of 'svprfb_gather', which expects an SVE vector type} } */ -+ svprfb_gather (pg, 0, SV_PLDL1KEEP); /* { dg-error {passing 'int' to argument 2 of 'svprfb_gather', which expects an SVE type rather than a scalar} } */ - - svprfb_gather (pg, s8, SV_PLDL1KEEP); /* { dg-error {passing 'svint8_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ - svprfb_gather (pg, u8, SV_PLDL1KEEP); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_1.c -index ab0ef304a..025795e3d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_1.c -@@ -10,7 +10,7 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32, - svorv (pg, u32, u32); /* { dg-error {too many arguments to function 'svorv'} } */ - svorv (0, u32); /* { dg-error {passing 'int' to argument 1 of 'svorv', which expects 'svbool_t'} } */ - svorv (u32, u32); /* { dg-error {passing 'svuint32_t' to argument 1 of 'svorv', which expects 'svbool_t'} } */ -- svorv (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svorv', which expects an SVE vector type} } */ -+ svorv (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svorv', which expects an SVE type rather than a scalar} } */ - svorv (pg, pg); /* { dg-error {'svorv' has no form that takes 'svbool_t' arguments} } */ - svorv (pg, s32); - svorv (pg, u32); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_wide_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_wide_1.c -index f99a2887b..68bacd0a3 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_wide_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_wide_1.c -@@ -10,7 +10,7 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32, - svaddv (pg, u32, u32); /* { dg-error {too many arguments to function 'svaddv'} } */ - svaddv (0, u32); /* { dg-error {passing 'int' to argument 1 of 'svaddv', which expects 'svbool_t'} } */ - svaddv (u32, u32); /* { dg-error {passing 'svuint32_t' to argument 1 of 'svaddv', which expects 'svbool_t'} } */ -- svaddv (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svaddv', which expects an SVE vector type} } */ -+ svaddv (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svaddv', which expects an SVE type rather than a scalar} } */ - svaddv (pg, pg); /* { dg-error {'svaddv' has no form that takes 'svbool_t' arguments} } */ - svaddv (pg, s32); - svaddv (pg, u32); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_1.c -index 6536679d5..c5942c701 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_1.c -@@ -66,5 +66,5 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - - svshrnb (f32, 1); /* { dg-error {'svshrnb' has no form that takes 'svfloat32_t' arguments} } */ - -- svshrnb (1, 1); /* { dg-error {passing 'int' to argument 1 of 'svshrnb', which expects an SVE vector type} } */ -+ svshrnb (1, 1); /* { dg-error {passing 'int' to argument 1 of 'svshrnb', which expects an SVE type rather than a scalar} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_to_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_to_uint_1.c -index 51f9388bf..3ecd20a22 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_to_uint_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowb_to_uint_1.c -@@ -54,5 +54,5 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - - svqshrunb (f32, 1); /* { dg-error {'svqshrunb' has no form that takes 'svfloat32_t' arguments} } */ - -- svqshrunb (1, 1); /* { dg-error {passing 'int' to argument 1 of 'svqshrunb', which expects an SVE vector type} } */ -+ svqshrunb (1, 1); /* { dg-error {passing 'int' to argument 1 of 'svqshrunb', which expects an SVE type rather than a scalar} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_1.c -index 6c31cf8ec..e9d1d1337 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_1.c -@@ -76,6 +76,6 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - - svshrnt (f32, f32, 1); /* { dg-error {'svshrnt' has no form that takes 'svfloat32_t' arguments} } */ - -- svshrnt (1, s32, 1); /* { dg-error {passing 'int' to argument 1 of 'svshrnt', which expects an SVE vector type} } */ -- svshrnt (s32, 1, 1); /* { dg-error {passing 'int' to argument 2 of 'svshrnt', which expects an SVE vector type} } */ -+ svshrnt (1, s32, 1); /* { dg-error {passing 'int' to argument 1 of 'svshrnt', which expects an SVE type rather than a scalar} } */ -+ svshrnt (s32, 1, 1); /* { dg-error {passing 'int' to argument 2 of 'svshrnt', which expects an SVE type rather than a scalar} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_to_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_to_uint_1.c -index 2e35ad304..741495609 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_to_uint_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_narrowt_to_uint_1.c -@@ -59,6 +59,6 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - - svqshrunt (u16, f32, 1); /* { dg-error {'svqshrunt' has no form that takes 'svfloat32_t' arguments} } */ - -- svqshrunt (1, u32, 1); /* { dg-error {passing 'int' to argument 1 of 'svqshrunt', which expects an SVE vector type} } */ -- svqshrunt (u32, 1, 1); /* { dg-error {passing 'int' to argument 2 of 'svqshrunt', which expects an SVE vector type} } */ -+ svqshrunt (1, u32, 1); /* { dg-error {passing 'int' to argument 1 of 'svqshrunt', which expects an SVE type rather than a scalar} } */ -+ svqshrunt (u32, 1, 1); /* { dg-error {passing 'int' to argument 2 of 'svqshrunt', which expects an SVE type rather than a scalar} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c -index 625f059af..0b2a3e837 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c -@@ -13,7 +13,7 @@ f1 (svbool_t pg, signed char *s8_ptr, void *void_ptr, struct s *s_ptr, - svst1 (pg, s8_ptr); /* { dg-error {too few arguments to function 'svst1'} } */ - svst1 (pg, s8_ptr, s8, 0); /* { dg-error {too many arguments to function 'svst1'} } */ - svst1 (0, s8_ptr, s8); /* { dg-error {passing 'int' to argument 1 of 'svst1', which expects 'svbool_t'} } */ -- svst1 (pg, void_ptr, 0); /* { dg-error {passing 'int' to argument 3 of 'svst1', which expects an SVE vector type} } */ -+ svst1 (pg, void_ptr, 0); /* { dg-error {passing 'int' to argument 3 of 'svst1', which expects an SVE type rather than a scalar} } */ - svst1 (pg, void_ptr, pg); /* { dg-error {'svst1' has no form that takes 'svbool_t' arguments} } */ - svst1 (pg, 0, s8); - svst1 (pg, (int32_t *) 0, s8); /* { dg-warning "passing argument 2 of 'svst1_s8' from incompatible pointer type" } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c -index c718b3ee0..b35e8955f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c -@@ -15,7 +15,7 @@ f1 (svbool_t pg, signed char *s8_ptr, void *void_ptr, struct s *s_ptr, - svst1_vnum (pg, s8_ptr, pg, s8); /* { dg-error {passing 'svbool_t' to argument 3 of 'svst1_vnum', which expects 'int64_t'} } */ - svst1_vnum (pg, s8_ptr, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svst1_vnum', which expects 'int64_t'} } */ - svst1_vnum (pg, s8_ptr, void_ptr, s8); /* { dg-warning "passing argument 3 of 'svst1_vnum_s8' makes integer from pointer without a cast" } */ -- svst1_vnum (pg, void_ptr, 0, 0); /* { dg-error {passing 'int' to argument 4 of 'svst1_vnum', which expects an SVE vector type} } */ -+ svst1_vnum (pg, void_ptr, 0, 0); /* { dg-error {passing 'int' to argument 4 of 'svst1_vnum', which expects an SVE type rather than a scalar} } */ - svst1_vnum (pg, void_ptr, 0, pg); /* { dg-error {'svst1_vnum' has no form that takes 'svbool_t' arguments} } */ - svst1_vnum (pg, 0, 0, s8); - svst1_vnum (pg, (int32_t *) 0, 0, s8); /* { dg-warning "passing argument 2 of 'svst1_vnum_s8' from incompatible pointer type" } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_1.c -index 10abf758c..3b3b56222 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_1.c -@@ -13,8 +13,8 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, - svst1_scatter (pg, u32); /* { dg-error {too few arguments to function 'svst1_scatter'} } */ - svst1_scatter (pg, u32, u32, 0); /* { dg-error {too many arguments to function 'svst1_scatter'} } */ - svst1_scatter (0, u32, u32); /* { dg-error {passing 'int' to argument 1 of 'svst1_scatter', which expects 'svbool_t'} } */ -- svst1_scatter (pg, 0, u32); /* { dg-error {passing 'int' to argument 2 of 'svst1_scatter', which expects an SVE vector type} } */ -- svst1_scatter (pg, u32, 0); /* { dg-error {passing 'int' to argument 3 of 'svst1_scatter', which expects an SVE vector type} } */ -+ svst1_scatter (pg, 0, u32); /* { dg-error {passing 'int' to argument 2 of 'svst1_scatter', which expects an SVE type rather than a scalar} } */ -+ svst1_scatter (pg, u32, 0); /* { dg-error {passing 'int' to argument 3 of 'svst1_scatter', which expects an SVE type rather than a scalar} } */ - - svst1_scatter (pg, u32, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svst1_scatter', which expects a vector of 32-bit or 64-bit elements} } */ - -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c -index a9233324c..9a554f54f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c -@@ -10,7 +10,7 @@ f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, - { - svbfmmla (f32, bf16); /* { dg-error {too few arguments to function 'svbfmmla'} } */ - svbfmmla (f32, bf16, bf16, 0); /* { dg-error {too many arguments to function 'svbfmmla'} } */ -- svbfmmla (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfmmla', which expects an SVE vector type} } */ -+ svbfmmla (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfmmla', which expects an SVE type rather than a scalar} } */ - svbfmmla (pg, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svbool_t' arguments} } */ - svbfmmla (u8, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svuint8_t' arguments} } */ - svbfmmla (u16, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svuint16_t' arguments} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c -index 23f027f2d..87e74fbcf 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c -@@ -10,7 +10,7 @@ f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, - { - svbfmlalb_lane (f32, bf16, bf16); /* { dg-error {too few arguments to function 'svbfmlalb_lane'} } */ - svbfmlalb_lane (f32, bf16, bf16, 0, 0); /* { dg-error {too many arguments to function 'svbfmlalb_lane'} } */ -- svbfmlalb_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfmlalb_lane', which expects an SVE vector type} } */ -+ svbfmlalb_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfmlalb_lane', which expects an SVE type rather than a scalar} } */ - svbfmlalb_lane (pg, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svbool_t' arguments} } */ - svbfmlalb_lane (u8, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svuint8_t' arguments} } */ - svbfmlalb_lane (u16, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svuint16_t' arguments} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c -index 4755ca79a..ca1852644 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c -@@ -10,7 +10,7 @@ f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, - { - svbfdot_lane (f32, bf16, bf16); /* { dg-error {too few arguments to function 'svbfdot_lane'} } */ - svbfdot_lane (f32, bf16, bf16, 0, 0); /* { dg-error {too many arguments to function 'svbfdot_lane'} } */ -- svbfdot_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfdot_lane', which expects an SVE vector type} } */ -+ svbfdot_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfdot_lane', which expects an SVE type rather than a scalar} } */ - svbfdot_lane (pg, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svbool_t' arguments} } */ - svbfdot_lane (u8, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svuint8_t' arguments} } */ - svbfdot_lane (u16, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svuint16_t' arguments} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c -index 2d09a8eeb..efdfb8955 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c -@@ -10,7 +10,7 @@ f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, - { - svbfdot (f32, bf16); /* { dg-error {too few arguments to function 'svbfdot'} } */ - svbfdot (f32, bf16, bf16, 0); /* { dg-error {too many arguments to function 'svbfdot'} } */ -- svbfdot (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfdot', which expects an SVE vector type} } */ -+ svbfdot (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfdot', which expects an SVE type rather than a scalar} } */ - svbfdot (pg, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svbool_t' arguments} } */ - svbfdot (u8, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svuint8_t' arguments} } */ - svbfdot (u16, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svuint16_t' arguments} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_lane_1.c -index 600be05a8..934b7bd60 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_lane_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_lane_1.c -@@ -10,14 +10,14 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, - { - svsudot_lane (s32, s8, u8); /* { dg-error {too few arguments to function 'svsudot_lane'} } */ - svsudot_lane (s32, s8, u8, 0, 0); /* { dg-error {too many arguments to function 'svsudot_lane'} } */ -- svsudot_lane (0, s8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svsudot_lane', which expects an SVE vector type} } */ -+ svsudot_lane (0, s8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svsudot_lane', which expects an SVE type rather than a scalar} } */ - svsudot_lane (pg, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svbool_t' arguments} } */ - svsudot_lane (u8, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svuint8_t' arguments} } */ - svsudot_lane (f32, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svfloat32_t' arguments} } */ - svsudot_lane (u32, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svuint32_t' arguments} } */ - svsudot_lane (s32, s8, u8, 0); -- svsudot_lane (s32, 0, u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svsudot_lane', which expects an SVE vector type} } */ -- svsudot_lane (s32, s8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svsudot_lane', which expects an SVE vector type} } */ -+ svsudot_lane (s32, 0, u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svsudot_lane', which expects an SVE type rather than a scalar} } */ -+ svsudot_lane (s32, s8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svsudot_lane', which expects an SVE type rather than a scalar} } */ - - svsudot_lane (s32, s8, u8, 0); - svsudot_lane (s32, u8, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsudot_lane', which expects a vector of signed integers} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_opt_n_1.c -index f95ac582f..c481996d3 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_opt_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_opt_n_1.c -@@ -23,12 +23,12 @@ f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, - { - svsudot (s32, s8); /* { dg-error {too few arguments to function 'svsudot'} } */ - svsudot (s32, s8, u8, u8); /* { dg-error {too many arguments to function 'svsudot'} } */ -- svsudot (0, s8, u8); /* { dg-error {passing 'int' to argument 1 of 'svsudot', which expects an SVE vector type} } */ -+ svsudot (0, s8, u8); /* { dg-error {passing 'int' to argument 1 of 'svsudot', which expects an SVE type rather than a scalar} } */ - svsudot (pg, s8, u8); /* { dg-error {'svsudot' has no form that takes 'svbool_t' arguments} } */ - svsudot (u8, s8, u8); /* { dg-error {'svsudot' has no form that takes 'svuint8_t' arguments} } */ - svsudot (f32, s8, u8); /* { dg-error {'svsudot' has no form that takes 'svfloat32_t' arguments} } */ - svsudot (s32, s8, u8); -- svsudot (s32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svsudot', which expects an SVE vector type} } */ -+ svsudot (s32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svsudot', which expects an SVE type rather than a scalar} } */ - svsudot (s32, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsudot', which expects a vector of signed integers} } */ - svsudot (s32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svsudot', which expects a vector of unsigned integers} } */ - svsudot (s32, s8, 0); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c -index d59ffab40..520c11f79 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c -@@ -10,9 +10,9 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, - svmla_lane (f32, f32, f32, 0, 0); /* { dg-error {too many arguments to function 'svmla_lane'} } */ - svmla_lane (pg, pg, pg, 0); /* { dg-error {'svmla_lane' has no form that takes 'svbool_t' arguments} } */ - svmla_lane (s32, s32, s32, 0); /* { dg-error {ACLE function 'svmla_lane_s32' requires ISA extension 'sve2'} "" { xfail aarch64_sve2 } } */ -- svmla_lane (1, f32, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmla_lane', which expects an SVE vector type} } */ -- svmla_lane (f32, 1, f32, 0); /* { dg-error {passing 'int' to argument 2 of 'svmla_lane', which expects an SVE vector type} } */ -- svmla_lane (f32, f32, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svmla_lane', which expects an SVE vector type} } */ -+ svmla_lane (1, f32, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmla_lane', which expects an SVE type rather than a scalar} } */ -+ svmla_lane (f32, 1, f32, 0); /* { dg-error {passing 'int' to argument 2 of 'svmla_lane', which expects an SVE type rather than a scalar} } */ -+ svmla_lane (f32, f32, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svmla_lane', which expects an SVE type rather than a scalar} } */ - svmla_lane (f32, f64, f32, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmla_lane', but previous arguments had type 'svfloat32_t'} } */ - svmla_lane (f32, f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svmla_lane', but previous arguments had type 'svfloat32_t'} } */ - svmla_lane (f32, f32, f32, s32); /* { dg-error {argument 4 of 'svmla_lane' must be an integer constant expression} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c -index 68e51724c..3163d130c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c -@@ -11,9 +11,9 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, - svcmla_lane (pg, pg, pg, 0, 90); /* { dg-error {'svcmla_lane' has no form that takes 'svbool_t' arguments} } */ - svcmla_lane (s32, s32, s32, 0, 90); /* { dg-error {ACLE function 'svcmla_lane_s32' requires ISA extension 'sve2'} "" { xfail aarch64_sve2 } } */ - svcmla_lane (f64, f64, f64, 0, 90); /* { dg-error {'svcmla_lane' has no form that takes 'svfloat64_t' arguments} } */ -- svcmla_lane (1, f32, f32, 0, 90); /* { dg-error {passing 'int' to argument 1 of 'svcmla_lane', which expects an SVE vector type} } */ -- svcmla_lane (f32, 1, f32, 0, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_lane', which expects an SVE vector type} } */ -- svcmla_lane (f32, f32, 1, 0, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_lane', which expects an SVE vector type} } */ -+ svcmla_lane (1, f32, f32, 0, 90); /* { dg-error {passing 'int' to argument 1 of 'svcmla_lane', which expects an SVE type rather than a scalar} } */ -+ svcmla_lane (f32, 1, f32, 0, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_lane', which expects an SVE type rather than a scalar} } */ -+ svcmla_lane (f32, f32, 1, 0, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_lane', which expects an SVE type rather than a scalar} } */ - svcmla_lane (f32, f64, f32, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcmla_lane', but previous arguments had type 'svfloat32_t'} } */ - svcmla_lane (f32, f32, f64, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_lane', but previous arguments had type 'svfloat32_t'} } */ - svcmla_lane (f32, f32, f32, s32, 0); /* { dg-error {argument 4 of 'svcmla_lane' must be an integer constant expression} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_long_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_long_lane_1.c -index e20e1a122..dd67b4e4e 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_long_lane_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_long_lane_1.c -@@ -11,16 +11,16 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, - { - svmlalb_lane (u64, u32, u32); /* { dg-error {too few arguments to function 'svmlalb_lane'} } */ - svmlalb_lane (u64, u32, u32, 0, 0); /* { dg-error {too many arguments to function 'svmlalb_lane'} } */ -- svmlalb_lane (0, u16, u16, 0); /* { dg-error {passing 'int' to argument 1 of 'svmlalb_lane', which expects an SVE vector type} } */ -+ svmlalb_lane (0, u16, u16, 0); /* { dg-error {passing 'int' to argument 1 of 'svmlalb_lane', which expects an SVE type rather than a scalar} } */ - svmlalb_lane (pg, u16, u16, 0); /* { dg-error {'svmlalb_lane' has no form that takes 'svbool_t' arguments} } */ - svmlalb_lane (u8, u8, u8, 0); /* { dg-error {'svmlalb_lane' has no form that takes 'svuint8_t' arguments} } */ - svmlalb_lane (u16, u8, u8, 0); /* { dg-error {'svmlalb_lane' has no form that takes 'svuint16_t' arguments} } */ - svmlalb_lane (f16, u16, u16, 0); /* { dg-error {'svmlalb_lane' has no form that takes 'svfloat16_t' arguments} } */ - svmlalb_lane (f32, f16, f16, 0); - svmlalb_lane (u32, u16, u16, 0); -- svmlalb_lane (u32, 0, u16, 0); /* { dg-error {passing 'int' to argument 2 of 'svmlalb_lane', which expects an SVE vector type} } */ -+ svmlalb_lane (u32, 0, u16, 0); /* { dg-error {passing 'int' to argument 2 of 'svmlalb_lane', which expects an SVE type rather than a scalar} } */ - svmlalb_lane (u32, s16, u16, 0); /* { dg-error {arguments 1 and 2 of 'svmlalb_lane' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint16_t' respectively} } */ -- svmlalb_lane (u32, u16, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svmlalb_lane', which expects an SVE vector type} } */ -+ svmlalb_lane (u32, u16, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svmlalb_lane', which expects an SVE type rather than a scalar} } */ - svmlalb_lane (u32, u16, s16, 0); /* { dg-error {arguments 1 and 3 of 'svmlalb_lane' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint16_t' respectively} } */ - svmlalb_lane (u32, u32, u32, 0); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint16_t' to argument 2 of 'svmlalb_lane', after passing 'svuint32_t' to argument 1} } */ - svmlalb_lane (u32, u8, u16, 0); /* { dg-error {passing 'svuint8_t' instead of the expected 'svuint16_t' to argument 2 of 'svmlalb_lane', after passing 'svuint32_t' to argument 1} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_long_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_long_opt_n_1.c -index c6718cf37..157fd7cd5 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_long_opt_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_long_opt_n_1.c -@@ -10,13 +10,13 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint16_t u16, svuint32_t u32, - { - svabalb (u16, u8); /* { dg-error {too few arguments to function 'svabalb'} } */ - svabalb (u16, u8, u8, u8); /* { dg-error {too many arguments to function 'svabalb'} } */ -- svabalb (0, u8, u8); /* { dg-error {passing 'int' to argument 1 of 'svabalb', which expects an SVE vector type} } */ -+ svabalb (0, u8, u8); /* { dg-error {passing 'int' to argument 1 of 'svabalb', which expects an SVE type rather than a scalar} } */ - svabalb (pg, u8, u8); /* { dg-error {'svabalb' has no form that takes 'svbool_t' arguments} } */ - svabalb (u8, u8, u8); /* { dg-error {'svabalb' has no form that takes 'svuint8_t' arguments} } */ - svabalb (f16, u8, u8); /* { dg-error {'svabalb' has no form that takes 'svfloat16_t' arguments} } */ - svabalb (f32, f16, f16); /* { dg-error {'svabalb' has no form that takes 'svfloat32_t' arguments} } */ - svabalb (u16, u8, u8); -- svabalb (u16, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svabalb', which expects an SVE vector type} } */ -+ svabalb (u16, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svabalb', which expects an SVE type rather than a scalar} } */ - svabalb (u16, s8, u8); /* { dg-error {arguments 1 and 2 of 'svabalb' must have the same signedness, but the values passed here have type 'svuint16_t' and 'svint8_t' respectively} } */ - svabalb (u16, u8, 0); - svabalb (u16, u8, s8); /* { dg-error {arguments 1 and 3 of 'svabalb' must have the same signedness, but the values passed here have type 'svuint16_t' and 'svint8_t' respectively} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c -index c4a80e9da..ac789c2be 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c -@@ -10,14 +10,14 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svmla_x (pg, u8, u8, u8, u8); /* { dg-error {too many arguments to function 'svmla_x'} } */ - svmla_x (u8, u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svmla_x', which expects 'svbool_t'} } */ - svmla_x (pg, pg, pg, pg); /* { dg-error {'svmla_x' has no form that takes 'svbool_t' arguments} } */ -- svmla_x (pg, 1, u8, u8); /* { dg-error {passing 'int' to argument 2 of 'svmla_x', which expects an SVE vector type} } */ -+ svmla_x (pg, 1, u8, u8); /* { dg-error {passing 'int' to argument 2 of 'svmla_x', which expects an SVE type rather than a scalar} } */ - svmla_x (pg, u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ - svmla_x (pg, u8, u8, u8); - svmla_x (pg, u8, s16, u8); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ - svmla_x (pg, u8, u16, u8); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ - svmla_x (pg, u8, f16, u8); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ - svmla_x (pg, u8, pg, u8); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -- svmla_x (pg, u8, 0, u8); /* { dg-error {passing 'int' to argument 3 of 'svmla_x', which expects an SVE vector type} } */ -+ svmla_x (pg, u8, 0, u8); /* { dg-error {passing 'int' to argument 3 of 'svmla_x', which expects an SVE type rather than a scalar} } */ - svmla_x (pg, u8, u8, s8); /* { dg-error {passing 'svint8_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ - svmla_x (pg, u8, u8, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ - svmla_x (pg, u8, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c -index e81552b64..c69b2d575 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c -@@ -9,13 +9,13 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, - { - svdot_lane (u32, u8, u8); /* { dg-error {too few arguments to function 'svdot_lane'} } */ - svdot_lane (u32, u8, u8, 0, 0); /* { dg-error {too many arguments to function 'svdot_lane'} } */ -- svdot_lane (0, u8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svdot_lane', which expects an SVE vector type} } */ -+ svdot_lane (0, u8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svdot_lane', which expects an SVE type rather than a scalar} } */ - svdot_lane (pg, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svbool_t' arguments} } */ - svdot_lane (u8, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svuint8_t' arguments} } */ - svdot_lane (f32, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svfloat32_t' arguments} } */ - svdot_lane (u32, u8, u8, 0); -- svdot_lane (u32, 0, u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svdot_lane', which expects an SVE vector type} } */ -- svdot_lane (u32, u8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svdot_lane', which expects an SVE vector type} } */ -+ svdot_lane (u32, 0, u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svdot_lane', which expects an SVE type rather than a scalar} } */ -+ svdot_lane (u32, u8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svdot_lane', which expects an SVE type rather than a scalar} } */ - - svdot_lane (s32, s8, s8, 0); - svdot_lane (s32, u8, s8, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_rotate_1.c -index a748a8627..9e84e7a89 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_rotate_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_rotate_1.c -@@ -11,13 +11,13 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, - { - svcdot_lane (u32, u8, u8, 0); /* { dg-error {too few arguments to function 'svcdot_lane'} } */ - svcdot_lane (u32, u8, u8, 0, 0, 0); /* { dg-error {too many arguments to function 'svcdot_lane'} } */ -- svcdot_lane (0, u8, u8, 0, 0); /* { dg-error {passing 'int' to argument 1 of 'svcdot_lane', which expects an SVE vector type} } */ -+ svcdot_lane (0, u8, u8, 0, 0); /* { dg-error {passing 'int' to argument 1 of 'svcdot_lane', which expects an SVE type rather than a scalar} } */ - svcdot_lane (pg, u8, u8, 0, 0); /* { dg-error {'svcdot_lane' has no form that takes 'svbool_t' arguments} } */ - svcdot_lane (s8, s8, s8, 0, 0); /* { dg-error {'svcdot_lane' has no form that takes 'svint8_t' arguments} } */ - svcdot_lane (f32, s8, s8, 0, 0); /* { dg-error {'svcdot_lane' has no form that takes 'svfloat32_t' arguments} } */ - svcdot_lane (s32, s8, s8, 0, 0); -- svcdot_lane (s32, 0, s8, 0, 0); /* { dg-error {passing 'int' to argument 2 of 'svcdot_lane', which expects an SVE vector type} } */ -- svcdot_lane (s32, s8, 0, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svcdot_lane', which expects an SVE vector type} } */ -+ svcdot_lane (s32, 0, s8, 0, 0); /* { dg-error {passing 'int' to argument 2 of 'svcdot_lane', which expects an SVE type rather than a scalar} } */ -+ svcdot_lane (s32, s8, 0, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svcdot_lane', which expects an SVE type rather than a scalar} } */ - - svcdot_lane (s32, s8, s8, 0, 0); - svcdot_lane (s32, u8, s8, 0, 0); /* { dg-error {arguments 1 and 2 of 'svcdot_lane' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c -index fee4096fe..85d4b2dd8 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c -@@ -8,12 +8,12 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, - { - svdot (u32, u8); /* { dg-error {too few arguments to function 'svdot'} } */ - svdot (u32, u8, u8, u8); /* { dg-error {too many arguments to function 'svdot'} } */ -- svdot (0, u8, u8); /* { dg-error {passing 'int' to argument 1 of 'svdot', which expects an SVE vector type} } */ -+ svdot (0, u8, u8); /* { dg-error {passing 'int' to argument 1 of 'svdot', which expects an SVE type rather than a scalar} } */ - svdot (pg, u8, u8); /* { dg-error {'svdot' has no form that takes 'svbool_t' arguments} } */ - svdot (u8, u8, u8); /* { dg-error {'svdot' has no form that takes 'svuint8_t' arguments} } */ - svdot (f32, u8, u8); /* { dg-error {'svdot' has no form that takes 'svfloat32_t' arguments} } */ - svdot (u32, u8, u8); -- svdot (u32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svdot', which expects an SVE vector type} } */ -+ svdot (u32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svdot', which expects an SVE type rather than a scalar} } */ - svdot (u32, s8, u8); /* { dg-error {arguments 1 and 2 of 'svdot' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ - svdot (u32, u8, 0); - svdot (u32, u8, s8); /* { dg-error {arguments 1 and 3 of 'svdot' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_rotate_1.c -index 65e749ba7..9dd7eaf3c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_rotate_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_rotate_1.c -@@ -11,13 +11,13 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, - { - svcdot (u32, u8, u8); /* { dg-error {too few arguments to function 'svcdot'} } */ - svcdot (u32, u8, u8, 0, 0); /* { dg-error {too many arguments to function 'svcdot'} } */ -- svcdot (0, u8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svcdot', which expects an SVE vector type} } */ -+ svcdot (0, u8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svcdot', which expects an SVE type rather than a scalar} } */ - svcdot (pg, u8, u8, 0); /* { dg-error {'svcdot' has no form that takes 'svbool_t' arguments} } */ - svcdot (s8, s8, s8, 0); /* { dg-error {'svcdot' has no form that takes 'svint8_t' arguments} } */ - svcdot (f32, s8, s8, 0); /* { dg-error {'svcdot' has no form that takes 'svfloat32_t' arguments} } */ - svcdot (s32, s8, s8, 0); -- svcdot (s32, 0, s8, 0); /* { dg-error {passing 'int' to argument 2 of 'svcdot', which expects an SVE vector type} } */ -- svcdot (s32, s8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svcdot', which expects an SVE vector type} } */ -+ svcdot (s32, 0, s8, 0); /* { dg-error {passing 'int' to argument 2 of 'svcdot', which expects an SVE type rather than a scalar} } */ -+ svcdot (s32, s8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svcdot', which expects an SVE type rather than a scalar} } */ - - svcdot (s32, s8, s8, 0); - svcdot (s32, u8, s8, 0); /* { dg-error {arguments 1 and 2 of 'svcdot' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c -index f340e3d1e..bb6740289 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c -@@ -10,9 +10,9 @@ f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i) - svcmla_x (f32, f32, f32, f32, 90); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svcmla_x', which expects 'svbool_t'} } */ - svcmla_x (pg, pg, pg, pg, 90); /* { dg-error {'svcmla_x' has no form that takes 'svbool_t' arguments} } */ - svcmla_x (pg, s32, s32, s32, 90); /* { dg-error {'svcmla_x' has no form that takes 'svint32_t' arguments} } */ -- svcmla_x (pg, 1, f32, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_x', which expects an SVE vector type} } */ -- svcmla_x (pg, f32, 1, f32, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_x', which expects an SVE vector type} } */ -- svcmla_x (pg, f32, f32, 1, 90); /* { dg-error {passing 'int' to argument 4 of 'svcmla_x', which expects an SVE vector type} } */ -+ svcmla_x (pg, 1, f32, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_x', which expects an SVE type rather than a scalar} } */ -+ svcmla_x (pg, f32, 1, f32, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_x', which expects an SVE type rather than a scalar} } */ -+ svcmla_x (pg, f32, f32, 1, 90); /* { dg-error {passing 'int' to argument 4 of 'svcmla_x', which expects an SVE type rather than a scalar} } */ - svcmla_x (pg, f32, f64, f32, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_x', but previous arguments had type 'svfloat32_t'} } */ - svcmla_x (pg, f32, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcmla_x', but previous arguments had type 'svfloat32_t'} } */ - svcmla_x (pg, f32, f32, f32, s32); /* { dg-error {argument 5 of 'svcmla_x' must be an integer constant expression} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c -index 28111375f..cfe601631 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c -@@ -12,10 +12,10 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svint16_t s16, - const int one = 1; - pg = svsra (pg, pg, 1); /* { dg-error {'svsra' has no form that takes 'svbool_t' arguments} } */ - pg = svsra (pg, s8, 1); /* { dg-error {passing 'svint8_t' to argument 2 of 'svsra', but previous arguments had type 'svbool_t'} } */ -- s8 = svsra (1, s8, 1); /* { dg-error {passing 'int' to argument 1 of 'svsra', which expects an SVE vector type} } */ -+ s8 = svsra (1, s8, 1); /* { dg-error {passing 'int' to argument 1 of 'svsra', which expects an SVE type rather than a scalar} } */ - s8 = svsra (s8, u8, 1); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsra', but previous arguments had type 'svint8_t'} } */ - s8 = svsra (s8, pg, 1); /* { dg-error {passing 'svbool_t' to argument 2 of 'svsra', but previous arguments had type 'svint8_t'} } */ -- s8 = svsra (s8, 1, 1); /* { dg-error {passing 'int' to argument 2 of 'svsra', which expects an SVE vector type} } */ -+ s8 = svsra (s8, 1, 1); /* { dg-error {passing 'int' to argument 2 of 'svsra', which expects an SVE type rather than a scalar} } */ - s8 = svsra (s8, s8, x); /* { dg-error {argument 3 of 'svsra' must be an integer constant expression} } */ - s8 = svsra (s8, s8, one); /* { dg-error {argument 3 of 'svsra' must be an integer constant expression} } */ - s8 = svsra (s8, s8, 0.4); /* { dg-error {passing 0 to argument 3 of 'svsra', which expects a value in the range \[1, 8\]} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c -index 711b6a133..5fb497701 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c -@@ -13,8 +13,8 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svuint16_t u16, svint16_t s16, - svtbx (pg, pg, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ - svtbx (pg, pg, u8); /* { dg-error {'svtbx' has no form that takes 'svbool_t' arguments} } */ - -- svtbx (u8, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svtbx', which expects an SVE vector type} } */ -- svtbx (u8, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svtbx', which expects an SVE vector type} } */ -+ svtbx (u8, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svtbx', which expects an SVE type rather than a scalar} } */ -+ svtbx (u8, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svtbx', which expects an SVE type rather than a scalar} } */ - svtbx (u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbx', but previous arguments had type 'svuint8_t'} } */ - svtbx (u8, u8, u8); - svtbx (u8, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ -@@ -29,7 +29,7 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svuint16_t u16, svint16_t s16, - svtbx (s8, s8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ - svtbx (s8, s8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ - -- svtbx (u16, 0, u16); /* { dg-error {passing 'int' to argument 2 of 'svtbx', which expects an SVE vector type} } */ -+ svtbx (u16, 0, u16); /* { dg-error {passing 'int' to argument 2 of 'svtbx', which expects an SVE type rather than a scalar} } */ - svtbx (u16, u16, u8); /* { dg-error {arguments 1 and 3 of 'svtbx' must have the same element size, but the values passed here have type 'svuint16_t' and 'svuint8_t' respectively} } */ - svtbx (u16, u16, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ - svtbx (u16, u16, u16); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_1.c -index f52fb39bf..d1aad1de1 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_1.c -@@ -23,15 +23,15 @@ f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, - { - svusmmla (s32, u8); /* { dg-error {too few arguments to function 'svusmmla'} } */ - svusmmla (s32, u8, s8, u8); /* { dg-error {too many arguments to function 'svusmmla'} } */ -- svusmmla (0, u8, s8); /* { dg-error {passing 'int' to argument 1 of 'svusmmla', which expects an SVE vector type} } */ -+ svusmmla (0, u8, s8); /* { dg-error {passing 'int' to argument 1 of 'svusmmla', which expects an SVE type rather than a scalar} } */ - svusmmla (pg, u8, s8); /* { dg-error {'svusmmla' has no form that takes 'svbool_t' arguments} } */ - svusmmla (u8, u8, s8); /* { dg-error {'svusmmla' has no form that takes 'svuint8_t' arguments} } */ - svusmmla (f32, u8, s8); /* { dg-error {'svusmmla' has no form that takes 'svfloat32_t' arguments} } */ - svusmmla (s32, u8, s8); -- svusmmla (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svusmmla', which expects an SVE vector type} } */ -+ svusmmla (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svusmmla', which expects an SVE type rather than a scalar} } */ - svusmmla (s32, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusmmla', which expects a vector of signed integers} } */ - svusmmla (s32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svusmmla', which expects a vector of unsigned integers} } */ -- svusmmla (s32, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svusmmla', which expects an SVE vector type} } */ -+ svusmmla (s32, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svusmmla', which expects an SVE type rather than a scalar} } */ - svusmmla (s32, u8, s8); - svusmmla (s32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svusmmla', after passing 'svint32_t' to argument 1} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_lane_1.c -index b40cfe9e8..0cc5c7497 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_lane_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_lane_1.c -@@ -10,14 +10,14 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, - { - svusdot_lane (s32, u8, s8); /* { dg-error {too few arguments to function 'svusdot_lane'} } */ - svusdot_lane (s32, u8, s8, 0, 0); /* { dg-error {too many arguments to function 'svusdot_lane'} } */ -- svusdot_lane (0, u8, s8, 0); /* { dg-error {passing 'int' to argument 1 of 'svusdot_lane', which expects an SVE vector type} } */ -+ svusdot_lane (0, u8, s8, 0); /* { dg-error {passing 'int' to argument 1 of 'svusdot_lane', which expects an SVE type rather than a scalar} } */ - svusdot_lane (pg, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svbool_t' arguments} } */ - svusdot_lane (u8, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svuint8_t' arguments} } */ - svusdot_lane (f32, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svfloat32_t' arguments} } */ - svusdot_lane (u32, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svuint32_t' arguments} } */ - svusdot_lane (s32, u8, s8, 0); -- svusdot_lane (s32, 0, s8, 0); /* { dg-error {passing 'int' to argument 2 of 'svusdot_lane', which expects an SVE vector type} } */ -- svusdot_lane (s32, u8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svusdot_lane', which expects an SVE vector type} } */ -+ svusdot_lane (s32, 0, s8, 0); /* { dg-error {passing 'int' to argument 2 of 'svusdot_lane', which expects an SVE type rather than a scalar} } */ -+ svusdot_lane (s32, u8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svusdot_lane', which expects an SVE type rather than a scalar} } */ - - svusdot_lane (s32, u8, s8, 0); - svusdot_lane (s32, s8, s8, 0); /* { dg-error {passing 'svint8_t' to argument 2 of 'svusdot_lane', which expects a vector of unsigned integers} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_opt_n_1.c -index 896b80390..f6585ae77 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_opt_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_opt_n_1.c -@@ -23,12 +23,12 @@ f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, - { - svusdot (s32, u8); /* { dg-error {too few arguments to function 'svusdot'} } */ - svusdot (s32, u8, s8, u8); /* { dg-error {too many arguments to function 'svusdot'} } */ -- svusdot (0, u8, s8); /* { dg-error {passing 'int' to argument 1 of 'svusdot', which expects an SVE vector type} } */ -+ svusdot (0, u8, s8); /* { dg-error {passing 'int' to argument 1 of 'svusdot', which expects an SVE type rather than a scalar} } */ - svusdot (pg, u8, s8); /* { dg-error {'svusdot' has no form that takes 'svbool_t' arguments} } */ - svusdot (u8, u8, s8); /* { dg-error {'svusdot' has no form that takes 'svuint8_t' arguments} } */ - svusdot (f32, u8, s8); /* { dg-error {'svusdot' has no form that takes 'svfloat32_t' arguments} } */ - svusdot (s32, u8, s8); -- svusdot (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svusdot', which expects an SVE vector type} } */ -+ svusdot (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svusdot', which expects an SVE type rather than a scalar} } */ - svusdot (s32, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusdot', which expects a vector of signed integers} } */ - svusdot (s32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svusdot', which expects a vector of unsigned integers} } */ - svusdot (s32, u8, 0); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c -index 8b98fc24d..c2eda93e3 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c -@@ -9,8 +9,8 @@ f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i) - svtmad (f32, f32, 0, 0); /* { dg-error {too many arguments to function 'svtmad'} } */ - svtmad (pg, pg, 0); /* { dg-error {'svtmad' has no form that takes 'svbool_t' arguments} } */ - svtmad (s32, s32, 0); /* { dg-error {'svtmad' has no form that takes 'svint32_t' arguments} } */ -- svtmad (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svtmad', which expects an SVE vector type} } */ -- svtmad (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svtmad', which expects an SVE vector type} } */ -+ svtmad (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svtmad', which expects an SVE type rather than a scalar} } */ -+ svtmad (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svtmad', which expects an SVE type rather than a scalar} } */ - svtmad (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svtmad', but previous arguments had type 'svfloat32_t'} } */ - svtmad (f32, f32, s32); /* { dg-error {argument 3 of 'svtmad' must be an integer constant expression} } */ - svtmad (f32, f32, i); /* { dg-error {argument 3 of 'svtmad' must be an integer constant expression} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c -index eef85a01d..8c865a0e6 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c -@@ -7,7 +7,7 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32) - { - svabs_m (s32, pg); /* { dg-error {too few arguments to function 'svabs_m'} } */ - svabs_m (s32, pg, s32, s32); /* { dg-error {too many arguments to function 'svabs_m'} } */ -- svabs_m (0, pg, s32); /* { dg-error {passing 'int' to argument 1 of 'svabs_m', which expects an SVE vector type} } */ -+ svabs_m (0, pg, s32); /* { dg-error {passing 'int' to argument 1 of 'svabs_m', which expects an SVE type rather than a scalar} } */ - svabs_m (s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svabs_m', which expects 'svbool_t'} } */ - svabs_m (s32, 0, s32); /* { dg-error {passing 'int' to argument 2 of 'svabs_m', which expects 'svbool_t'} } */ - svabs_m (s32, pg, s32); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_2.c -index e94673a66..bf93e21a4 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_2.c -@@ -9,7 +9,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8) - svabs_x (pg, s8, s8); /* { dg-error {too many arguments to function 'svabs_x'} } */ - svabs_x (s8, s8); /* { dg-error {passing 'svint8_t' to argument 1 of 'svabs_x', which expects 'svbool_t'} } */ - svabs_x (pg, pg); /* { dg-error {'svabs_x' has no form that takes 'svbool_t' arguments} } */ -- svabs_x (pg, 1); /* { dg-error {passing 'int' to argument 2 of 'svabs_x', which expects an SVE vector type} } */ -+ svabs_x (pg, 1); /* { dg-error {passing 'int' to argument 2 of 'svabs_x', which expects an SVE type rather than a scalar} } */ - svabs_x (pg, s8); - svabs_x (pg, u8); /* { dg-error {'svabs_x' has no form that takes 'svuint8_t' arguments} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_1.c -index caa4e623d..f59ad590b 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_1.c -@@ -9,7 +9,7 @@ test (svbool_t pg, svint8_t s8, svuint8_t u8, - svcvt_f64_x (pg); /* { dg-error {too few arguments to function 'svcvt_f64_x'} } */ - svcvt_f64_x (pg, s32, 0); /* { dg-error {too many arguments to function 'svcvt_f64_x'} } */ - svcvt_f64_x (s32, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svcvt_f64_x', which expects 'svbool_t'} } */ -- svcvt_f64_x (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svcvt_f64_x', which expects an SVE vector type} } */ -+ svcvt_f64_x (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svcvt_f64_x', which expects an SVE type rather than a scalar} } */ - - svcvt_f64_x (pg, s8); /* { dg-error {'svcvt_f64_x' has no form that takes 'svint8_t' arguments} } */ - svcvt_f64_x (pg, s16); /* { dg-error {'svcvt_f64_x' has no form that takes 'svint16_t' arguments} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_2.c -index ddbd93b69..2649fd694 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_2.c -@@ -12,7 +12,7 @@ test (svbool_t pg, svint8_t s8, svuint8_t u8, - svcvt_f64_m (0, pg, s32); /* { dg-error {passing 'int' to argument 1 of 'svcvt_f64_m', which expects 'svfloat64_t'} } */ - svcvt_f64_m (pg, pg, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svcvt_f64_m', which expects 'svfloat64_t'} } */ - svcvt_f64_m (f64, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcvt_f64_m', which expects 'svbool_t'} } */ -- svcvt_f64_m (f64, pg, 0); /* { dg-error {passing 'int' to argument 3 of 'svcvt_f64_m', which expects an SVE vector type} } */ -+ svcvt_f64_m (f64, pg, 0); /* { dg-error {passing 'int' to argument 3 of 'svcvt_f64_m', which expects an SVE type rather than a scalar} } */ - - svcvt_f64_m (f64, pg, s8); /* { dg-error {'svcvt_f64_m' has no form that takes 'svint8_t' arguments} } */ - svcvt_f64_m (f64, pg, s16); /* { dg-error {'svcvt_f64_m' has no form that takes 'svint16_t' arguments} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_narrowt_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_narrowt_1.c -index 92c07b8c1..a5d56dec0 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_narrowt_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_narrowt_1.c -@@ -14,7 +14,7 @@ test (svbool_t pg, svint8_t s8, svuint8_t u8, - svcvtnt_f32_m (0, pg, f64); /* { dg-error {passing 'int' to argument 1 of 'svcvtnt_f32_m', which expects 'svfloat32_t'} } */ - svcvtnt_f32_m (pg, pg, f64); /* { dg-error {passing 'svbool_t' to argument 1 of 'svcvtnt_f32_m', which expects 'svfloat32_t'} } */ - svcvtnt_f32_m (f32, s32, f64); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcvtnt_f32_m', which expects 'svbool_t'} } */ -- svcvtnt_f32_m (f32, pg, 0); /* { dg-error {passing 'int' to argument 3 of 'svcvtnt_f32_m', which expects an SVE vector type} } */ -+ svcvtnt_f32_m (f32, pg, 0); /* { dg-error {passing 'int' to argument 3 of 'svcvtnt_f32_m', which expects an SVE type rather than a scalar} } */ - - svcvtnt_f32_m (f32, pg, s8); /* { dg-error {'svcvtnt_f32_m' has no form that takes 'svint8_t' arguments} } */ - svcvtnt_f32_m (f32, pg, s16); /* { dg-error {'svcvtnt_f32_m' has no form that takes 'svint16_t' arguments} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowb_1.c -index c03d644ed..c2465e3e2 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowb_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowb_1.c -@@ -23,5 +23,5 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svqxtnb (u64); - svqxtnb (s64); - svqxtnb (f32); /* { dg-error {'svqxtnb' has no form that takes 'svfloat32_t' arguments} } */ -- svqxtnb (1); /* { dg-error {passing 'int' to argument 1 of 'svqxtnb', which expects an SVE vector type} } */ -+ svqxtnb (1); /* { dg-error {passing 'int' to argument 1 of 'svqxtnb', which expects an SVE type rather than a scalar} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowb_to_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowb_to_uint_1.c -index c3e210380..60051f80c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowb_to_uint_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowb_to_uint_1.c -@@ -23,5 +23,5 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svqxtunb (u64); /* { dg-error {'svqxtunb' has no form that takes 'svuint64_t' arguments} } */ - svqxtunb (s64); - svqxtunb (f32); /* { dg-error {'svqxtunb' has no form that takes 'svfloat32_t' arguments} } */ -- svqxtunb (1); /* { dg-error {passing 'int' to argument 1 of 'svqxtunb', which expects an SVE vector type} } */ -+ svqxtunb (1); /* { dg-error {passing 'int' to argument 1 of 'svqxtunb', which expects an SVE type rather than a scalar} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowt_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowt_1.c -index 4ed179cb3..a0612dcb7 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowt_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowt_1.c -@@ -26,6 +26,6 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svqxtnt (u32, u64); - svqxtnt (s32, s64); - svqxtnt (f16, f32); /* { dg-error {'svqxtnt' has no form that takes 'svfloat32_t' arguments} } */ -- svqxtnt (1, u16); /* { dg-error {passing 'int' to argument 1 of 'svqxtnt', which expects an SVE vector type} } */ -- svqxtnt (u8, 1); /* { dg-error {passing 'int' to argument 2 of 'svqxtnt', which expects an SVE vector type} } */ -+ svqxtnt (1, u16); /* { dg-error {passing 'int' to argument 1 of 'svqxtnt', which expects an SVE type rather than a scalar} } */ -+ svqxtnt (u8, 1); /* { dg-error {passing 'int' to argument 2 of 'svqxtnt', which expects an SVE type rather than a scalar} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowt_to_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowt_to_uint_1.c -index acaa546ee..8e5fa5b3d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowt_to_uint_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_narrowt_to_uint_1.c -@@ -26,6 +26,6 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svqxtunt (u32, u64); /* { dg-error {'svqxtunt' has no form that takes 'svuint64_t' arguments} } */ - svqxtunt (u32, s64); - svqxtunt (u16, f32); /* { dg-error {'svqxtunt' has no form that takes 'svfloat32_t' arguments} } */ -- svqxtunt (1, u16); /* { dg-error {passing 'int' to argument 1 of 'svqxtunt', which expects an SVE vector type} } */ -- svqxtunt (u8, 1); /* { dg-error {passing 'int' to argument 2 of 'svqxtunt', which expects an SVE vector type} } */ -+ svqxtunt (1, u16); /* { dg-error {passing 'int' to argument 1 of 'svqxtunt', which expects an SVE type rather than a scalar} } */ -+ svqxtunt (u8, 1); /* { dg-error {passing 'int' to argument 2 of 'svqxtunt', which expects an SVE type rather than a scalar} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_int_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_int_1.c -index 517d11ff0..e2e172d2d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_int_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_int_1.c -@@ -10,7 +10,7 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32, - { - svlogb_m (s32, pg); /* { dg-error {too few arguments to function 'svlogb_m'} } */ - svlogb_m (s32, pg, f32, s32); /* { dg-error {too many arguments to function 'svlogb_m'} } */ -- svlogb_m (0, pg, f32); /* { dg-error {passing 'int' to argument 1 of 'svlogb_m', which expects an SVE vector type} } */ -+ svlogb_m (0, pg, f32); /* { dg-error {passing 'int' to argument 1 of 'svlogb_m', which expects an SVE type rather than a scalar} } */ - svlogb_m (s32, u32, f32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svlogb_m', which expects 'svbool_t'} } */ - svlogb_m (s32, 0, f32); /* { dg-error {passing 'int' to argument 2 of 'svlogb_m', which expects 'svbool_t'} } */ - svlogb_m (s32, pg, s32); /* { dg-error {'svlogb_m' has no form that takes 'svint32_t' arguments} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_1.c -index 888b52513..b3cf0b9f5 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_1.c -@@ -8,7 +8,7 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32, - { - svclz_m (u32, pg); /* { dg-error {too few arguments to function 'svclz_m'} } */ - svclz_m (u32, pg, s32, s32); /* { dg-error {too many arguments to function 'svclz_m'} } */ -- svclz_m (0, pg, f32); /* { dg-error {passing 'int' to argument 1 of 'svclz_m', which expects an SVE vector type} } */ -+ svclz_m (0, pg, f32); /* { dg-error {passing 'int' to argument 1 of 'svclz_m', which expects an SVE type rather than a scalar} } */ - svclz_m (u32, u32, f32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svclz_m', which expects 'svbool_t'} } */ - svclz_m (u32, 0, f32); /* { dg-error {passing 'int' to argument 2 of 'svclz_m', which expects 'svbool_t'} } */ - svclz_m (u32, pg, s32); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_2.c -index 233e847e9..da02d12fb 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_2.c -@@ -9,7 +9,7 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32, - { - svclz_m (u32, pg); /* { dg-error {too few arguments to function 'svclz_m'} } */ - svclz_m (u32, pg, s32, s32); /* { dg-error {too many arguments to function 'svclz_m'} } */ -- svclz_m (0, pg, f32); /* { dg-error {passing 'int' to argument 1 of 'svclz_m', which expects an SVE vector type} } */ -+ svclz_m (0, pg, f32); /* { dg-error {passing 'int' to argument 1 of 'svclz_m', which expects an SVE type rather than a scalar} } */ - svclz_m (u32, u32, f32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svclz_m', which expects 'svbool_t'} } */ - svclz_m (u32, 0, f32); /* { dg-error {passing 'int' to argument 2 of 'svclz_m', which expects 'svbool_t'} } */ - svclz_m (u32, pg, s32); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_3.c -index da57b07ea..858a2a5e0 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_3.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_3.c -@@ -9,6 +9,6 @@ f1 (svbool_t pg, svuint8_t u8) - svcnt_x (pg, u8, u8); /* { dg-error {too many arguments to function 'svcnt_x'} } */ - svcnt_x (u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svcnt_x', which expects 'svbool_t'} } */ - svcnt_x (pg, pg); /* { dg-error {'svcnt_x' has no form that takes 'svbool_t' arguments} } */ -- svcnt_x (pg, 1); /* { dg-error {passing 'int' to argument 2 of 'svcnt_x', which expects an SVE vector type} } */ -+ svcnt_x (pg, 1); /* { dg-error {passing 'int' to argument 2 of 'svcnt_x', which expects an SVE type rather than a scalar} } */ - svcnt_x (pg, u8); - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_uint_1.c -index 9c8acdf2d..e3275a8ce 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_uint_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_uint_1.c -@@ -8,7 +8,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - { - svexpa (); /* { dg-error {too few arguments to function 'svexpa'} } */ - svexpa (u16, u16); /* { dg-error {too many arguments to function 'svexpa'} } */ -- svexpa (1); /* { dg-error {passing 'int' to argument 1 of 'svexpa', which expects an SVE vector type} } */ -+ svexpa (1); /* { dg-error {passing 'int' to argument 1 of 'svexpa', which expects an SVE type rather than a scalar} } */ - svexpa (pg); /* { dg-error {passing 'svbool_t' to argument 1 of 'svexpa', which expects a vector of unsigned integers} } */ - svexpa (s8); /* { dg-error {passing 'svint8_t' to argument 1 of 'svexpa', which expects a vector of unsigned integers} } */ - svexpa (s16); /* { dg-error {passing 'svint16_t' to argument 1 of 'svexpa', which expects a vector of unsigned integers} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_widen_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_widen_1.c -index 95a97a72e..a194bd6ab 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_widen_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_widen_1.c -@@ -8,8 +8,8 @@ test (svbool_t pg, svint8_t s8, svuint8_t u8, - { - svunpklo (); /* { dg-error {too few arguments to function 'svunpklo'} } */ - svunpklo (pg, s8); /* { dg-error {too many arguments to function 'svunpklo'} } */ -- svunpklo (i); /* { dg-error {passing 'int' to argument 1 of 'svunpklo', which expects an SVE vector type} } */ -- svunpklo (f); /* { dg-error {passing 'float' to argument 1 of 'svunpklo', which expects an SVE vector type} } */ -+ svunpklo (i); /* { dg-error {passing 'int' to argument 1 of 'svunpklo', which expects an SVE type rather than a scalar} } */ -+ svunpklo (f); /* { dg-error {passing 'float' to argument 1 of 'svunpklo', which expects an SVE type rather than a scalar} } */ - svunpklo (pg); - svunpklo (s8); - svunpklo (s16); --- -2.33.0 - diff --git a/0167-Backport-SME-aarch64-Replace-vague-previous-argument.patch b/0167-Backport-SME-aarch64-Replace-vague-previous-argument.patch deleted file mode 100644 index 840ad1d..0000000 --- a/0167-Backport-SME-aarch64-Replace-vague-previous-argument.patch +++ /dev/null @@ -1,698 +0,0 @@ -From 6a7cb5074824416ae562de0589550a930e9dbcaf Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:21 +0000 -Subject: [PATCH 068/157] [Backport][SME] aarch64: Replace vague "previous - arguments" message - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1b52d4b66e8b91ec1e3de9c0b79aaf258824b875 - -If an SVE ACLE intrinsic requires two arguments to have the -same type, the C resolver would report mismatches as "argument N -has type T2, but previous arguments had type T1". This patch makes -the message say which argument had type T1. - -This is needed to give decent error messages for some SME cases. - -gcc/ - * config/aarch64/aarch64-sve-builtins.h - (function_resolver::require_matching_vector_type): Add a parameter - that specifies the number of the earlier argument that is being - matched against. - * config/aarch64/aarch64-sve-builtins.cc - (function_resolver::require_matching_vector_type): Likewise. - (require_derived_vector_type): Update calls accordingly. - (function_resolver::resolve_unary): Likewise. - (function_resolver::resolve_uniform): Likewise. - (function_resolver::resolve_uniform_opt_n): Likewise. - * config/aarch64/aarch64-sve-builtins-shapes.cc - (binary_long_lane_def::resolve): Likewise. - (clast_def::resolve, ternary_uint_def::resolve): Likewise. - -gcc/testsuite/ - * gcc.target/aarch64/sve/acle/general-c/*: Replace "but previous - arguments had" with "but argument N had". ---- - .../aarch64/aarch64-sve-builtins-shapes.cc | 6 ++-- - gcc/config/aarch64/aarch64-sve-builtins.cc | 17 +++++------ - gcc/config/aarch64/aarch64-sve-builtins.h | 3 +- - .../aarch64/sve/acle/general-c/binary_1.c | 6 ++-- - .../sve/acle/general-c/binary_lane_1.c | 2 +- - .../sve/acle/general-c/binary_long_lane_1.c | 2 +- - .../sve/acle/general-c/binary_long_opt_n_1.c | 8 +++--- - .../acle/general-c/binary_narrowb_opt_n_1.c | 8 +++--- - .../acle/general-c/binary_narrowt_opt_n_1.c | 8 +++--- - .../sve/acle/general-c/binary_opt_n_2.c | 14 +++++----- - .../sve/acle/general-c/binary_opt_n_3.c | 16 +++++------ - .../sve/acle/general-c/binary_rotate_1.c | 2 +- - .../sve/acle/general-c/binary_to_uint_1.c | 4 +-- - .../aarch64/sve/acle/general-c/clast_1.c | 2 +- - .../aarch64/sve/acle/general-c/compare_1.c | 14 +++++----- - .../sve/acle/general-c/compare_opt_n_1.c | 14 +++++----- - .../aarch64/sve/acle/general-c/create_1.c | 6 ++-- - .../aarch64/sve/acle/general-c/create_3.c | 6 ++-- - .../aarch64/sve/acle/general-c/create_5.c | 6 ++-- - .../aarch64/sve/acle/general-c/mmla_1.c | 14 +++++----- - .../sve/acle/general-c/ternary_lane_1.c | 4 +-- - .../acle/general-c/ternary_lane_rotate_1.c | 4 +-- - .../sve/acle/general-c/ternary_opt_n_1.c | 28 +++++++++---------- - .../sve/acle/general-c/ternary_rotate_1.c | 4 +-- - .../general-c/ternary_shift_right_imm_1.c | 6 ++-- - .../sve/acle/general-c/ternary_uint_1.c | 6 ++-- - .../aarch64/sve/acle/general-c/tmad_1.c | 2 +- - .../aarch64/sve/acle/general-c/unary_1.c | 8 +++--- - .../aarch64/sve/acle/general-c/undeclared_2.c | 2 +- - 29 files changed, 112 insertions(+), 110 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -index 3ecef026c..40aa418e0 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -@@ -1153,7 +1153,7 @@ struct binary_long_lane_def : public overloaded_base<0> - type_suffix_index type, result_type; - if (!r.check_gp_argument (3, i, nargs) - || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -- || !r.require_matching_vector_type (i + 1, type) -+ || !r.require_matching_vector_type (i + 1, i, type) - || !r.require_integer_immediate (i + 2) - || (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES) - return error_mark_node; -@@ -1608,7 +1608,7 @@ struct clast_def : public overloaded_base<0> - { - type_suffix_index type; - if ((type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -- || !r.require_matching_vector_type (i + 1, type)) -+ || !r.require_matching_vector_type (i + 1, i, type)) - return error_mark_node; - return r.resolve_to (MODE_none, type); - } -@@ -3108,7 +3108,7 @@ struct ternary_uint_def : public overloaded_base<0> - type_suffix_index type; - if (!r.check_gp_argument (3, i, nargs) - || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -- || !r.require_matching_vector_type (i + 1, type) -+ || !r.require_matching_vector_type (i + 1, i, type) - || !r.require_derived_vector_type (i + 2, i, type, TYPE_unsigned)) - return error_mark_node; - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc -index 4e94e3633..1545fd78d 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc -@@ -1561,11 +1561,12 @@ function_resolver::require_vector_type (unsigned int argno, - return true; - } - --/* Like require_vector_type, but TYPE is inferred from previous arguments -+/* Like require_vector_type, but TYPE is inferred from argument FIRST_ARGNO - rather than being a fixed part of the function signature. This changes - the nature of the error messages. */ - bool - function_resolver::require_matching_vector_type (unsigned int argno, -+ unsigned int first_argno, - type_suffix_index type) - { - type_suffix_index new_type = infer_vector_type (argno); -@@ -1575,9 +1576,9 @@ function_resolver::require_matching_vector_type (unsigned int argno, - if (type != new_type) - { - error_at (location, "passing %qT to argument %d of %qE, but" -- " previous arguments had type %qT", -+ " argument %d had type %qT", - get_vector_type (new_type), argno + 1, fndecl, -- get_vector_type (type)); -+ first_argno + 1, get_vector_type (type)); - return false; - } - return true; -@@ -1626,7 +1627,7 @@ require_derived_vector_type (unsigned int argno, - { - /* There's no need to resolve this case out of order. */ - gcc_assert (argno > first_argno); -- return require_matching_vector_type (argno, first_type); -+ return require_matching_vector_type (argno, first_argno, first_type); - } - - /* Use FIRST_TYPE to get the expected type class and element size. */ -@@ -2314,7 +2315,7 @@ function_resolver::resolve_unary (type_class_index merge_tclass, - so we can use normal left-to-right resolution. */ - if ((type = infer_vector_type (0)) == NUM_TYPE_SUFFIXES - || !require_vector_type (1, VECTOR_TYPE_svbool_t) -- || !require_matching_vector_type (2, type)) -+ || !require_matching_vector_type (2, 0, type)) - return error_mark_node; - } - else -@@ -2359,9 +2360,9 @@ function_resolver::resolve_uniform (unsigned int nops, unsigned int nimm) - || (type = infer_vector_type (i)) == NUM_TYPE_SUFFIXES) - return error_mark_node; - -- i += 1; -+ unsigned int first_arg = i++; - for (; i < nargs - nimm; ++i) -- if (!require_matching_vector_type (i, type)) -+ if (!require_matching_vector_type (i, first_arg, type)) - return error_mark_node; - - for (; i < nargs; ++i) -@@ -2390,7 +2391,7 @@ function_resolver::resolve_uniform_opt_n (unsigned int nops) - - unsigned int first_arg = i++; - for (; i < nargs - 1; ++i) -- if (!require_matching_vector_type (i, type)) -+ if (!require_matching_vector_type (i, first_arg, type)) - return error_mark_node; - - return finish_opt_n_resolution (i, first_arg, type); -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h -index 5a4f35123..f7d6cc084 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.h -+++ b/gcc/config/aarch64/aarch64-sve-builtins.h -@@ -476,7 +476,8 @@ public: - bool require_vector_or_scalar_type (unsigned int); - - bool require_vector_type (unsigned int, vector_type_index); -- bool require_matching_vector_type (unsigned int, type_suffix_index); -+ bool require_matching_vector_type (unsigned int, unsigned int, -+ type_suffix_index); - bool require_derived_vector_type (unsigned int, unsigned int, - type_suffix_index, - type_class_index = SAME_TYPE_CLASS, -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c -index 4343146de..2e919d287 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c -@@ -7,8 +7,8 @@ f1 (svbool_t pg, svuint8_t u8, svint16_t s16) - { - svzip1 (pg); /* { dg-error {too few arguments to function 'svzip1'} } */ - svzip1 (pg, u8, u8); /* { dg-error {too many arguments to function 'svzip1'} } */ -- svzip1 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svzip1', but previous arguments had type 'svbool_t'} } */ -- svzip1 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */ -- svzip1 (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */ -+ svzip1 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svzip1', but argument 1 had type 'svbool_t'} } */ -+ svzip1 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svzip1', but argument 1 had type 'svuint8_t'} } */ -+ svzip1 (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svzip1', but argument 1 had type 'svuint8_t'} } */ - svzip1 (u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svzip1', which expects an SVE type rather than a scalar} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c -index 10b6b7e81..81533b25d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c -@@ -12,7 +12,7 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, - svmul_lane (s32, s32, 0); /* { dg-error {ACLE function 'svmul_lane_s32' requires ISA extension 'sve2'} "" { xfail aarch64_sve2 } } */ - svmul_lane (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmul_lane', which expects an SVE type rather than a scalar} } */ - svmul_lane (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svmul_lane', which expects an SVE type rather than a scalar} } */ -- svmul_lane (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmul_lane', but previous arguments had type 'svfloat32_t'} } */ -+ svmul_lane (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmul_lane', but argument 1 had type 'svfloat32_t'} } */ - svmul_lane (f32, f32, s32); /* { dg-error {argument 3 of 'svmul_lane' must be an integer constant expression} } */ - svmul_lane (f32, f32, i); /* { dg-error {argument 3 of 'svmul_lane' must be an integer constant expression} } */ - -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c -index 805863f76..25b620877 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c -@@ -21,7 +21,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, - svmullb_lane (f64, f64, 0); /* { dg-error {'svmullb_lane' has no form that takes 'svfloat64_t' arguments} } */ - svmullb_lane (1, u32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmullb_lane', which expects an SVE type rather than a scalar} } */ - svmullb_lane (u32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svmullb_lane', which expects an SVE type rather than a scalar} } */ -- svmullb_lane (u32, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmullb_lane', but previous arguments had type 'svuint32_t'} } */ -+ svmullb_lane (u32, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmullb_lane', but argument 1 had type 'svuint32_t'} } */ - svmullb_lane (u32, u32, s32); /* { dg-error {argument 3 of 'svmullb_lane' must be an integer constant expression} } */ - svmullb_lane (u32, u32, i); /* { dg-error {argument 3 of 'svmullb_lane' must be an integer constant expression} } */ - -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c -index ee704eeae..1f513dde9 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c -@@ -24,10 +24,10 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svaddlb (s64, s64); /* { dg-error {'svaddlb' has no form that takes 'svint64_t' arguments} } */ - svaddlb (f16, f16); /* { dg-error {'svaddlb' has no form that takes 'svfloat16_t' arguments} } */ - svaddlb (1, u8); /* { dg-error {passing 'int' to argument 1 of 'svaddlb', which expects an SVE type rather than a scalar} } */ -- svaddlb (u8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint8_t'} } */ -- svaddlb (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint8_t'} } */ -- svaddlb (u8, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint8_t'} } */ -- svaddlb (u16, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint16_t'} } */ -+ svaddlb (u8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddlb', but argument 1 had type 'svuint8_t'} } */ -+ svaddlb (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddlb', but argument 1 had type 'svuint8_t'} } */ -+ svaddlb (u8, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svaddlb', but argument 1 had type 'svuint8_t'} } */ -+ svaddlb (u16, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svaddlb', but argument 1 had type 'svuint16_t'} } */ - svaddlb (u8, 0); - svaddlb (u16, 0); - svaddlb (u32, 0); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c -index 8ca549ba9..4a29b5c43 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c -@@ -24,10 +24,10 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svaddhnb (s64, s64); - svaddhnb (f32, f32); /* { dg-error {'svaddhnb' has no form that takes 'svfloat32_t' arguments} } */ - svaddhnb (1, u16); /* { dg-error {passing 'int' to argument 1 of 'svaddhnb', which expects an SVE type rather than a scalar} } */ -- svaddhnb (u16, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */ -- svaddhnb (u16, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */ -- svaddhnb (u16, u32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */ -- svaddhnb (u16, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */ -+ svaddhnb (u16, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddhnb', but argument 1 had type 'svuint16_t'} } */ -+ svaddhnb (u16, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddhnb', but argument 1 had type 'svuint16_t'} } */ -+ svaddhnb (u16, u32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svaddhnb', but argument 1 had type 'svuint16_t'} } */ -+ svaddhnb (u16, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svaddhnb', but argument 1 had type 'svuint16_t'} } */ - svaddhnb (u8, 0); /* { dg-error {'svaddhnb' has no form that takes 'svuint8_t' arguments} } */ - svaddhnb (u16, 0); - svaddhnb (u32, 0); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c -index 2b537965b..4a442616e 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c -@@ -28,10 +28,10 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svaddhnt (f16, f32, f32); /* { dg-error {'svaddhnt' has no form that takes 'svfloat32_t' arguments} } */ - svaddhnt (1, u16, u16); /* { dg-error {passing 'int' to argument 1 of 'svaddhnt', which expects an SVE type rather than a scalar} } */ - svaddhnt (u8, 1, u16); /* { dg-error {passing 'int' to argument 2 of 'svaddhnt', which expects an SVE type rather than a scalar} } */ -- svaddhnt (u8, u16, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */ -- svaddhnt (u8, u16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */ -- svaddhnt (u8, u16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */ -- svaddhnt (u8, u16, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */ -+ svaddhnt (u8, u16, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svaddhnt', but argument 2 had type 'svuint16_t'} } */ -+ svaddhnt (u8, u16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svaddhnt', but argument 2 had type 'svuint16_t'} } */ -+ svaddhnt (u8, u16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svaddhnt', but argument 2 had type 'svuint16_t'} } */ -+ svaddhnt (u8, u16, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svaddhnt', but argument 2 had type 'svuint16_t'} } */ - svaddhnt (u8, u8, 0); /* { dg-error {'svaddhnt' has no form that takes 'svuint8_t' arguments} } */ - svaddhnt (u16, u16, 0); /* { dg-error {passing 'svuint16_t' instead of the expected 'svuint8_t' to argument 1 of 'svaddhnt', after passing 'svuint16_t' to argument 2} } */ - svaddhnt (s8, u16, 0); /* { dg-error {arguments 1 and 2 of 'svaddhnt' must have the same signedness, but the values passed here have type 'svint8_t' and 'svuint16_t' respectively} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c -index a151f90d1..40447cf83 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c -@@ -11,16 +11,16 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svadd_x (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svadd_x', which expects 'svbool_t'} } */ - svadd_x (pg, pg, pg); /* { dg-error {'svadd_x' has no form that takes 'svbool_t' arguments} } */ - svadd_x (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svadd_x', which expects an SVE type rather than a scalar} } */ -- svadd_x (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ -+ svadd_x (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */ - svadd_x (pg, u8, u8); -- svadd_x (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ -- svadd_x (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ -- svadd_x (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ -- svadd_x (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ -+ svadd_x (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */ -+ svadd_x (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */ -+ svadd_x (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */ -+ svadd_x (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */ - svadd_x (pg, u8, 0); - -- svadd_x (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svfloat16_t'} } */ -- svadd_x (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svfloat16_t'} } */ -+ svadd_x (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svfloat16_t'} } */ -+ svadd_x (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svfloat16_t'} } */ - svadd_x (pg, f16, f16); - svadd_x (pg, f16, 1); - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c -index 70ec9c585..94e20bc91 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c -@@ -11,19 +11,19 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svand_z (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svand_z', which expects 'svbool_t'} } */ - svand_z (pg, pg, pg); - svand_z (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svand_z', which expects an SVE type rather than a scalar} } */ -- svand_z (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ -+ svand_z (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */ - svand_z (pg, u8, u8); -- svand_z (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ -- svand_z (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ -- svand_z (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ -- svand_z (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ -+ svand_z (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */ -+ svand_z (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */ -+ svand_z (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */ -+ svand_z (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */ - svand_z (pg, u8, 0); - -- svand_z (pg, pg, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svand_z', but previous arguments had type 'svbool_t'} } */ -+ svand_z (pg, pg, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svand_z', but argument 2 had type 'svbool_t'} } */ - svand_z (pg, pg, 0); /* { dg-error {passing 'int' to argument 3 of 'svand_z', but its 'svbool_t' form does not accept scalars} } */ - -- svand_z (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svfloat16_t'} } */ -- svand_z (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svfloat16_t'} } */ -+ svand_z (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but argument 2 had type 'svfloat16_t'} } */ -+ svand_z (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but argument 2 had type 'svfloat16_t'} } */ - svand_z (pg, f16, f16); /* { dg-error {'svand_z' has no form that takes 'svfloat16_t' arguments} } */ - svand_z (pg, f16, 1); /* { dg-error {'svand_z' has no form that takes 'svfloat16_t' arguments} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c -index 7669e4a02..8939ce258 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c -@@ -12,7 +12,7 @@ f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i) - svcadd_x (pg, s32, s32, 90); /* { dg-error {'svcadd_x' has no form that takes 'svint32_t' arguments} } */ - svcadd_x (pg, 1, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcadd_x', which expects an SVE type rather than a scalar} } */ - svcadd_x (pg, f32, 1, 90); /* { dg-error {passing 'int' to argument 3 of 'svcadd_x', which expects an SVE type rather than a scalar} } */ -- svcadd_x (pg, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcadd_x', but previous arguments had type 'svfloat32_t'} } */ -+ svcadd_x (pg, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcadd_x', but argument 2 had type 'svfloat32_t'} } */ - svcadd_x (pg, f32, f32, s32); /* { dg-error {argument 4 of 'svcadd_x' must be an integer constant expression} } */ - svcadd_x (pg, f32, f32, i); /* { dg-error {argument 4 of 'svcadd_x' must be an integer constant expression} } */ - svcadd_x (pg, f32, f32, -90); /* { dg-error {passing -90 to argument 4 of 'svcadd_x', which expects either 90 or 270} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c -index 154662487..2c3fe5df1 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c -@@ -12,8 +12,8 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32) - svhistcnt_z (0, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svhistcnt_z', which expects 'svbool_t'} } */ - svhistcnt_z (s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svhistcnt_z', which expects 'svbool_t'} } */ - svhistcnt_z (pg, 0, s32); /* { dg-error {passing 'int' to argument 2 of 'svhistcnt_z', which expects an SVE type rather than a scalar} } */ -- svhistcnt_z (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svhistcnt_z', but previous arguments had type 'svbool_t'} } */ -- svhistcnt_z (pg, s32, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svhistcnt_z', but previous arguments had type 'svint32_t'} } */ -+ svhistcnt_z (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svhistcnt_z', but argument 2 had type 'svbool_t'} } */ -+ svhistcnt_z (pg, s32, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svhistcnt_z', but argument 2 had type 'svint32_t'} } */ - svhistcnt_z (pg, s32, 0); /* { dg-error {passing 'int' to argument 3 of 'svhistcnt_z', which expects an SVE type rather than a scalar} } */ - svhistcnt_z (pg, pg, pg); /* { dg-error {'svhistcnt_z' has no form that takes 'svbool_t' arguments} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c -index ba1b2520f..47ce47328 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c -@@ -10,6 +10,6 @@ test (svbool_t pg, svint32_t s32, svint64_t s64, int i) - svclasta (pg, 1, pg); /* { dg-error {'svclasta' has no form that takes 'svbool_t' arguments} } */ - svclasta (pg, i, s32); - svclasta (pg, s32, 1); /* { dg-error {passing 'int' to argument 3 of 'svclasta', which expects an SVE type rather than a scalar} } */ -- svclasta (pg, s32, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svclasta', but previous arguments had type 'svint32_t'} } */ -+ svclasta (pg, s32, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svclasta', but argument 2 had type 'svint32_t'} } */ - svclasta (pg, pg, pg); /* { dg-error {'svclasta' has no form that takes 'svbool_t' arguments} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c -index 5474124cc..0dd0ad910 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c -@@ -13,15 +13,15 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svmatch (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svmatch', which expects 'svbool_t'} } */ - svmatch (pg, pg, pg); /* { dg-error {'svmatch' has no form that takes 'svbool_t' arguments} } */ - svmatch (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svmatch', which expects an SVE type rather than a scalar} } */ -- svmatch (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ -+ svmatch (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */ - svmatch (pg, u8, u8); -- svmatch (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ -- svmatch (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ -- svmatch (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ -- svmatch (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */ -+ svmatch (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */ -+ svmatch (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */ -+ svmatch (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */ -+ svmatch (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */ - svmatch (pg, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmatch', which expects an SVE type rather than a scalar} } */ - -- svmatch (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svfloat16_t'} } */ -- svmatch (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svfloat16_t'} } */ -+ svmatch (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but argument 2 had type 'svfloat16_t'} } */ -+ svmatch (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but argument 2 had type 'svfloat16_t'} } */ - svmatch (pg, f16, f16); /* { dg-error {'svmatch' has no form that takes 'svfloat16_t' arguments} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c -index 6faa73972..cfa50d387 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c -@@ -11,16 +11,16 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svcmpeq (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svcmpeq', which expects 'svbool_t'} } */ - svcmpeq (pg, pg, pg); /* { dg-error {'svcmpeq' has no form that takes 'svbool_t' arguments} } */ - svcmpeq (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svcmpeq', which expects an SVE type rather than a scalar} } */ -- svcmpeq (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ -+ svcmpeq (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */ - svcmpeq (pg, u8, u8); -- svcmpeq (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ -- svcmpeq (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ -- svcmpeq (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ -- svcmpeq (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ -+ svcmpeq (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */ -+ svcmpeq (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */ -+ svcmpeq (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */ -+ svcmpeq (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */ - svcmpeq (pg, u8, 0); - -- svcmpeq (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svfloat16_t'} } */ -- svcmpeq (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svfloat16_t'} } */ -+ svcmpeq (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svfloat16_t'} } */ -+ svcmpeq (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svfloat16_t'} } */ - svcmpeq (pg, f16, f16); - svcmpeq (pg, f16, 1); - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c -index 83e4a5600..7a617aa15 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c -@@ -10,11 +10,11 @@ f1 (svuint8x2_t *ptr, svbool_t pg, svuint8_t u8, svfloat64_t f64, - *ptr = svcreate2 (u8); /* { dg-error {too few arguments to function 'svcreate2'} } */ - *ptr = svcreate2 (u8, u8, u8); /* { dg-error {too many arguments to function 'svcreate2'} } */ - *ptr = svcreate2 (u8x2, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 1 of 'svcreate2', which expects a single SVE vector rather than a tuple} } */ -- *ptr = svcreate2 (u8, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcreate2', but previous arguments had type 'svuint8_t'} } */ -- *ptr = svcreate2 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate2', but previous arguments had type 'svuint8_t'} } */ -+ *ptr = svcreate2 (u8, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcreate2', but argument 1 had type 'svuint8_t'} } */ -+ *ptr = svcreate2 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate2', but argument 1 had type 'svuint8_t'} } */ - *ptr = svcreate2 (u8, x); /* { dg-error {passing 'int' to argument 2 of 'svcreate2', which expects an SVE type rather than a scalar} } */ - *ptr = svcreate2 (x, u8); /* { dg-error {passing 'int' to argument 1 of 'svcreate2', which expects an SVE type rather than a scalar} } */ -- *ptr = svcreate2 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svcreate2', but previous arguments had type 'svbool_t'} } */ -+ *ptr = svcreate2 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svcreate2', but argument 1 had type 'svbool_t'} } */ - *ptr = svcreate2 (pg, pg); /* { dg-error {'svcreate2' has no form that takes 'svbool_t' arguments} } */ - *ptr = svcreate2 (u8, u8); - *ptr = svcreate2 (f64, f64); /* { dg-error {incompatible types when assigning to type 'svuint8x2_t' from type 'svfloat64x2_t'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c -index e3302f7e7..40f3a1fed 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c -@@ -11,11 +11,11 @@ f1 (svfloat16x3_t *ptr, svbool_t pg, svfloat16_t f16, svfloat64_t f64, - *ptr = svcreate3 (f16, f16); /* { dg-error {too few arguments to function 'svcreate3'} } */ - *ptr = svcreate3 (f16, f16, f16, f16); /* { dg-error {too many arguments to function 'svcreate3'} } */ - *ptr = svcreate3 (f16x3, f16x3, f16x3); /* { dg-error {passing 'svfloat16x3_t' to argument 1 of 'svcreate3', which expects a single SVE vector rather than a tuple} } */ -- *ptr = svcreate3 (f16, f16, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcreate3', but previous arguments had type 'svfloat16_t'} } */ -- *ptr = svcreate3 (f16, pg, f16); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate3', but previous arguments had type 'svfloat16_t'} } */ -+ *ptr = svcreate3 (f16, f16, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcreate3', but argument 1 had type 'svfloat16_t'} } */ -+ *ptr = svcreate3 (f16, pg, f16); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate3', but argument 1 had type 'svfloat16_t'} } */ - *ptr = svcreate3 (f16, x, f16); /* { dg-error {passing 'int' to argument 2 of 'svcreate3', which expects an SVE type rather than a scalar} } */ - *ptr = svcreate3 (x, f16, f16); /* { dg-error {passing 'int' to argument 1 of 'svcreate3', which expects an SVE type rather than a scalar} } */ -- *ptr = svcreate3 (pg, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svcreate3', but previous arguments had type 'svbool_t'} } */ -+ *ptr = svcreate3 (pg, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svcreate3', but argument 1 had type 'svbool_t'} } */ - *ptr = svcreate3 (pg, pg, pg); /* { dg-error {'svcreate3' has no form that takes 'svbool_t' arguments} } */ - *ptr = svcreate3 (f16, f16, f16); - *ptr = svcreate3 (f64, f64, f64); /* { dg-error {incompatible types when assigning to type 'svfloat16x3_t' from type 'svfloat64x3_t'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c -index c850c94f0..bf3dd5d75 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c -@@ -12,11 +12,11 @@ f1 (svint32x4_t *ptr, svbool_t pg, svint32_t s32, svfloat64_t f64, - *ptr = svcreate4 (s32, s32, s32); /* { dg-error {too few arguments to function 'svcreate4'} } */ - *ptr = svcreate4 (s32, s32, s32, s32, s32); /* { dg-error {too many arguments to function 'svcreate4'} } */ - *ptr = svcreate4 (s32x4, s32x4, s32x4, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 1 of 'svcreate4', which expects a single SVE vector rather than a tuple} } */ -- *ptr = svcreate4 (s32, s32, s32, f64); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcreate4', but previous arguments had type 'svint32_t'} } */ -- *ptr = svcreate4 (s32, s32, pg, s32); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcreate4', but previous arguments had type 'svint32_t'} } */ -+ *ptr = svcreate4 (s32, s32, s32, f64); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcreate4', but argument 1 had type 'svint32_t'} } */ -+ *ptr = svcreate4 (s32, s32, pg, s32); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcreate4', but argument 1 had type 'svint32_t'} } */ - *ptr = svcreate4 (s32, x, s32, s32); /* { dg-error {passing 'int' to argument 2 of 'svcreate4', which expects an SVE type rather than a scalar} } */ - *ptr = svcreate4 (x, s32, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svcreate4', which expects an SVE type rather than a scalar} } */ -- *ptr = svcreate4 (pg, s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcreate4', but previous arguments had type 'svbool_t'} } */ -+ *ptr = svcreate4 (pg, s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcreate4', but argument 1 had type 'svbool_t'} } */ - *ptr = svcreate4 (pg, pg, pg, pg); /* { dg-error {'svcreate4' has no form that takes 'svbool_t' arguments} } */ - *ptr = svcreate4 (s32, s32, s32, s32); - *ptr = svcreate4 (f64, f64, f64, f64); /* { dg-error {incompatible types when assigning to type 'svint32x4_t' from type 'svfloat64x4_t'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c -index 7fc7bb67b..ca2ab8a6f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c -@@ -44,13 +44,13 @@ f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, svint32_t s32, - svmmla (u32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svmmla', after passing 'svuint32_t' to argument 1} } */ - - svmmla (f16, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svfloat16_t' arguments} } */ -- svmmla (f32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */ -- svmmla (f32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */ -- svmmla (f32, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */ -- svmmla (f64, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */ -- svmmla (f32, f32, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */ -- svmmla (f64, f32, f16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */ -- svmmla (f64, f64, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */ -+ svmmla (f32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */ -+ svmmla (f32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */ -+ svmmla (f32, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */ -+ svmmla (f64, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat64_t'} } */ -+ svmmla (f32, f32, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */ -+ svmmla (f64, f32, f16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat64_t'} } */ -+ svmmla (f64, f64, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but argument 1 had type 'svfloat64_t'} } */ - - svmmla (f16, f16, f16); /* { dg-error {'svmmla' has no form that takes 'svfloat16_t' arguments} } */ - svmmla (f32, f32, f32); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c -index 520c11f79..0a67f82bf 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c -@@ -13,8 +13,8 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, - svmla_lane (1, f32, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmla_lane', which expects an SVE type rather than a scalar} } */ - svmla_lane (f32, 1, f32, 0); /* { dg-error {passing 'int' to argument 2 of 'svmla_lane', which expects an SVE type rather than a scalar} } */ - svmla_lane (f32, f32, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svmla_lane', which expects an SVE type rather than a scalar} } */ -- svmla_lane (f32, f64, f32, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmla_lane', but previous arguments had type 'svfloat32_t'} } */ -- svmla_lane (f32, f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svmla_lane', but previous arguments had type 'svfloat32_t'} } */ -+ svmla_lane (f32, f64, f32, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmla_lane', but argument 1 had type 'svfloat32_t'} } */ -+ svmla_lane (f32, f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svmla_lane', but argument 1 had type 'svfloat32_t'} } */ - svmla_lane (f32, f32, f32, s32); /* { dg-error {argument 4 of 'svmla_lane' must be an integer constant expression} } */ - svmla_lane (f32, f32, f32, i); /* { dg-error {argument 4 of 'svmla_lane' must be an integer constant expression} } */ - -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c -index 3163d130c..60c9c466e 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c -@@ -14,8 +14,8 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, - svcmla_lane (1, f32, f32, 0, 90); /* { dg-error {passing 'int' to argument 1 of 'svcmla_lane', which expects an SVE type rather than a scalar} } */ - svcmla_lane (f32, 1, f32, 0, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_lane', which expects an SVE type rather than a scalar} } */ - svcmla_lane (f32, f32, 1, 0, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_lane', which expects an SVE type rather than a scalar} } */ -- svcmla_lane (f32, f64, f32, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcmla_lane', but previous arguments had type 'svfloat32_t'} } */ -- svcmla_lane (f32, f32, f64, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_lane', but previous arguments had type 'svfloat32_t'} } */ -+ svcmla_lane (f32, f64, f32, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcmla_lane', but argument 1 had type 'svfloat32_t'} } */ -+ svcmla_lane (f32, f32, f64, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_lane', but argument 1 had type 'svfloat32_t'} } */ - svcmla_lane (f32, f32, f32, s32, 0); /* { dg-error {argument 4 of 'svcmla_lane' must be an integer constant expression} } */ - svcmla_lane (f32, f32, f32, i, 0); /* { dg-error {argument 4 of 'svcmla_lane' must be an integer constant expression} } */ - -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c -index ac789c2be..6ca223475 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c -@@ -11,24 +11,24 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, - svmla_x (u8, u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svmla_x', which expects 'svbool_t'} } */ - svmla_x (pg, pg, pg, pg); /* { dg-error {'svmla_x' has no form that takes 'svbool_t' arguments} } */ - svmla_x (pg, 1, u8, u8); /* { dg-error {passing 'int' to argument 2 of 'svmla_x', which expects an SVE type rather than a scalar} } */ -- svmla_x (pg, u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ - svmla_x (pg, u8, u8, u8); -- svmla_x (pg, u8, s16, u8); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -- svmla_x (pg, u8, u16, u8); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -- svmla_x (pg, u8, f16, u8); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -- svmla_x (pg, u8, pg, u8); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, s16, u8); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, u16, u8); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, f16, u8); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, pg, u8); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ - svmla_x (pg, u8, 0, u8); /* { dg-error {passing 'int' to argument 3 of 'svmla_x', which expects an SVE type rather than a scalar} } */ -- svmla_x (pg, u8, u8, s8); /* { dg-error {passing 'svint8_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -- svmla_x (pg, u8, u8, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -- svmla_x (pg, u8, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -- svmla_x (pg, u8, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -- svmla_x (pg, u8, u8, pg); /* { dg-error {passing 'svbool_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, u8, s8); /* { dg-error {passing 'svint8_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, u8, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, u8, pg); /* { dg-error {passing 'svbool_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */ - svmla_x (pg, u8, u8, 0); - -- svmla_x (pg, f16, s16, f16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */ -- svmla_x (pg, f16, u16, f16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */ -- svmla_x (pg, f16, f16, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */ -- svmla_x (pg, f16, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */ -+ svmla_x (pg, f16, s16, f16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svfloat16_t'} } */ -+ svmla_x (pg, f16, u16, f16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svfloat16_t'} } */ -+ svmla_x (pg, f16, f16, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svfloat16_t'} } */ -+ svmla_x (pg, f16, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svfloat16_t'} } */ - svmla_x (pg, f16, f16, f16); - svmla_x (pg, f16, f16, 1); - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c -index bb6740289..68b2cfc1d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c -@@ -13,8 +13,8 @@ f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i) - svcmla_x (pg, 1, f32, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_x', which expects an SVE type rather than a scalar} } */ - svcmla_x (pg, f32, 1, f32, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_x', which expects an SVE type rather than a scalar} } */ - svcmla_x (pg, f32, f32, 1, 90); /* { dg-error {passing 'int' to argument 4 of 'svcmla_x', which expects an SVE type rather than a scalar} } */ -- svcmla_x (pg, f32, f64, f32, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_x', but previous arguments had type 'svfloat32_t'} } */ -- svcmla_x (pg, f32, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcmla_x', but previous arguments had type 'svfloat32_t'} } */ -+ svcmla_x (pg, f32, f64, f32, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_x', but argument 2 had type 'svfloat32_t'} } */ -+ svcmla_x (pg, f32, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcmla_x', but argument 2 had type 'svfloat32_t'} } */ - svcmla_x (pg, f32, f32, f32, s32); /* { dg-error {argument 5 of 'svcmla_x' must be an integer constant expression} } */ - svcmla_x (pg, f32, f32, f32, i); /* { dg-error {argument 5 of 'svcmla_x' must be an integer constant expression} } */ - svcmla_x (pg, f32, f32, f32, -90); /* { dg-error {passing -90 to argument 5 of 'svcmla_x', which expects 0, 90, 180 or 270} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c -index cfe601631..134cf98fd 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c -@@ -11,10 +11,10 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svint16_t s16, - { - const int one = 1; - pg = svsra (pg, pg, 1); /* { dg-error {'svsra' has no form that takes 'svbool_t' arguments} } */ -- pg = svsra (pg, s8, 1); /* { dg-error {passing 'svint8_t' to argument 2 of 'svsra', but previous arguments had type 'svbool_t'} } */ -+ pg = svsra (pg, s8, 1); /* { dg-error {passing 'svint8_t' to argument 2 of 'svsra', but argument 1 had type 'svbool_t'} } */ - s8 = svsra (1, s8, 1); /* { dg-error {passing 'int' to argument 1 of 'svsra', which expects an SVE type rather than a scalar} } */ -- s8 = svsra (s8, u8, 1); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsra', but previous arguments had type 'svint8_t'} } */ -- s8 = svsra (s8, pg, 1); /* { dg-error {passing 'svbool_t' to argument 2 of 'svsra', but previous arguments had type 'svint8_t'} } */ -+ s8 = svsra (s8, u8, 1); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsra', but argument 1 had type 'svint8_t'} } */ -+ s8 = svsra (s8, pg, 1); /* { dg-error {passing 'svbool_t' to argument 2 of 'svsra', but argument 1 had type 'svint8_t'} } */ - s8 = svsra (s8, 1, 1); /* { dg-error {passing 'int' to argument 2 of 'svsra', which expects an SVE type rather than a scalar} } */ - s8 = svsra (s8, s8, x); /* { dg-error {argument 3 of 'svsra' must be an integer constant expression} } */ - s8 = svsra (s8, s8, one); /* { dg-error {argument 3 of 'svsra' must be an integer constant expression} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c -index 5fb497701..a639562b1 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c -@@ -15,14 +15,14 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svuint16_t u16, svint16_t s16, - - svtbx (u8, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svtbx', which expects an SVE type rather than a scalar} } */ - svtbx (u8, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svtbx', which expects an SVE type rather than a scalar} } */ -- svtbx (u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbx', but previous arguments had type 'svuint8_t'} } */ -+ svtbx (u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbx', but argument 1 had type 'svuint8_t'} } */ - svtbx (u8, u8, u8); - svtbx (u8, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ - svtbx (u8, u8, u16); /* { dg-error {arguments 1 and 3 of 'svtbx' must have the same element size, but the values passed here have type 'svuint8_t' and 'svuint16_t' respectively} } */ - svtbx (u8, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ - svtbx (u8, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ - -- svtbx (s8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svtbx', but previous arguments had type 'svint8_t'} } */ -+ svtbx (s8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svtbx', but argument 1 had type 'svint8_t'} } */ - svtbx (s8, s8, u8); - svtbx (s8, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ - svtbx (s8, s8, u16); /* { dg-error {arguments 1 and 3 of 'svtbx' must have the same element size, but the values passed here have type 'svint8_t' and 'svuint16_t' respectively} } */ -@@ -36,7 +36,7 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svuint16_t u16, svint16_t s16, - svtbx (u16, u16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ - svtbx (u16, u16, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ - -- svtbx (s16, u16, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svtbx', but previous arguments had type 'svint16_t'} } */ -+ svtbx (s16, u16, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svtbx', but argument 1 had type 'svint16_t'} } */ - svtbx (s16, s16, u8); /* { dg-error {arguments 1 and 3 of 'svtbx' must have the same element size, but the values passed here have type 'svint16_t' and 'svuint8_t' respectively} } */ - svtbx (s16, s16, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */ - svtbx (s16, s16, u16); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c -index c2eda93e3..992b50199 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c -@@ -11,7 +11,7 @@ f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i) - svtmad (s32, s32, 0); /* { dg-error {'svtmad' has no form that takes 'svint32_t' arguments} } */ - svtmad (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svtmad', which expects an SVE type rather than a scalar} } */ - svtmad (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svtmad', which expects an SVE type rather than a scalar} } */ -- svtmad (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svtmad', but previous arguments had type 'svfloat32_t'} } */ -+ svtmad (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svtmad', but argument 1 had type 'svfloat32_t'} } */ - svtmad (f32, f32, s32); /* { dg-error {argument 3 of 'svtmad' must be an integer constant expression} } */ - svtmad (f32, f32, i); /* { dg-error {argument 3 of 'svtmad' must be an integer constant expression} } */ - svtmad (f32, f32, -1); /* { dg-error {passing -1 to argument 3 of 'svtmad', which expects a value in the range \[0, 7\]} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c -index 8c865a0e6..9c9c383dd 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c -@@ -13,9 +13,9 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32) - svabs_m (s32, pg, s32); - svabs_m (u32, pg, u32); /* { dg-error {'svabs_m' has no form that takes 'svuint32_t' arguments} } */ - svabs_m (f32, pg, f32); -- svabs_m (s32, pg, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svabs_m', but previous arguments had type 'svint32_t'} } */ -- svabs_m (s32, pg, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svabs_m', but previous arguments had type 'svint32_t'} } */ -- svabs_m (s32, pg, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svabs_m', but previous arguments had type 'svint32_t'} } */ -- svabs_m (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svabs_m', but previous arguments had type 'svbool_t'} } */ -+ svabs_m (s32, pg, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svabs_m', but argument 1 had type 'svint32_t'} } */ -+ svabs_m (s32, pg, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svabs_m', but argument 1 had type 'svint32_t'} } */ -+ svabs_m (s32, pg, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svabs_m', but argument 1 had type 'svint32_t'} } */ -+ svabs_m (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svabs_m', but argument 1 had type 'svbool_t'} } */ - svabs_m (pg, pg, pg); /* { dg-error {'svabs_m' has no form that takes 'svbool_t' arguments} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c -index 7e869bda8..6ffd3d9e8 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c -@@ -9,7 +9,7 @@ f (svint8_t s8, svuint16_t u16, svfloat32_t f32, - u16 = svneg_x (pg, u16); /* { dg-error {'svneg_x' has no form that takes 'svuint16_t' arguments} } */ - f32 = svclz_x (pg, f32); /* { dg-error {'svclz_x' has no form that takes 'svfloat32_t' arguments} } */ - s16x2 = svcreate2 (s8); /* { dg-error {too few arguments to function 'svcreate2'} } */ -- u32x3 = svcreate3 (u16, u16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svcreate3', but previous arguments had type 'svuint16_t'} } */ -+ u32x3 = svcreate3 (u16, u16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svcreate3', but argument 1 had type 'svuint16_t'} } */ - f64x4 = svcreate4 (f32, f32, f32, f32, f32); /* { dg-error {too many arguments to function 'svcreate4'} } */ - pg = svadd_x (pg, pg, pg); /* { dg-error {'svadd_x' has no form that takes 'svbool_t' arguments} } */ - } --- -2.33.0 - diff --git a/0168-Backport-SME-aarch64-Make-more-use-of-sve_type-in-AC.patch b/0168-Backport-SME-aarch64-Make-more-use-of-sve_type-in-AC.patch deleted file mode 100644 index d293e4e..0000000 --- a/0168-Backport-SME-aarch64-Make-more-use-of-sve_type-in-AC.patch +++ /dev/null @@ -1,368 +0,0 @@ -From 05dee9ad331c27345b014fe9aec0067a6f3b07d9 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:21 +0000 -Subject: [PATCH 069/157] [Backport][SME] aarch64: Make more use of sve_type in - ACLE code - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1f7f076ad6293cad19d35efdf726eb48cf78e3dd - -This patch makes some functions operate on sve_type, rather than just -on type suffixes. It also allows an overload to be resolved based on -a mode and sve_type. In this case the sve_type is used to derive the -group size as well as a type suffix. - -This is needed for the SME2 intrinsics and the new tuple forms of -svreinterpret. No functional change intended on its own. - -gcc/ - * config/aarch64/aarch64-sve-builtins.h - (function_resolver::lookup_form): Add an overload that takes - an sve_type rather than type and group suffixes. - (function_resolver::resolve_to): Likewise. - (function_resolver::infer_vector_or_tuple_type): Return an sve_type. - (function_resolver::infer_tuple_type): Likewise. - (function_resolver::require_matching_vector_type): Take an sve_type - rather than a type_suffix_index. - (function_resolver::require_derived_vector_type): Likewise. - * config/aarch64/aarch64-sve-builtins.cc (num_vectors_to_group): - New function. - (function_resolver::lookup_form): Add an overload that takes - an sve_type rather than type and group suffixes. - (function_resolver::resolve_to): Likewise. - (function_resolver::infer_vector_or_tuple_type): Return an sve_type. - (function_resolver::infer_tuple_type): Likewise. - (function_resolver::infer_vector_type): Update accordingly. - (function_resolver::require_matching_vector_type): Take an sve_type - rather than a type_suffix_index. - (function_resolver::require_derived_vector_type): Likewise. - * config/aarch64/aarch64-sve-builtins-shapes.cc (get_def::resolve) - (set_def::resolve, store_def::resolve, tbl_tuple_def::resolve): Update - calls accordingly. ---- - .../aarch64/aarch64-sve-builtins-shapes.cc | 16 +-- - gcc/config/aarch64/aarch64-sve-builtins.cc | 111 +++++++++++++----- - gcc/config/aarch64/aarch64-sve-builtins.h | 12 +- - 3 files changed, 95 insertions(+), 44 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -index 40aa418e0..f187b4cb2 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -@@ -1904,9 +1904,9 @@ struct get_def : public overloaded_base<0> - resolve (function_resolver &r) const OVERRIDE - { - unsigned int i, nargs; -- type_suffix_index type; -+ sve_type type; - if (!r.check_gp_argument (2, i, nargs) -- || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES -+ || !(type = r.infer_tuple_type (i)) - || !r.require_integer_immediate (i + 1)) - return error_mark_node; - -@@ -2417,9 +2417,9 @@ struct set_def : public overloaded_base<0> - resolve (function_resolver &r) const OVERRIDE - { - unsigned int i, nargs; -- type_suffix_index type; -+ sve_type type; - if (!r.check_gp_argument (3, i, nargs) -- || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES -+ || !(type = r.infer_tuple_type (i)) - || !r.require_integer_immediate (i + 1) - || !r.require_derived_vector_type (i + 2, i, type)) - return error_mark_node; -@@ -2592,11 +2592,11 @@ struct store_def : public overloaded_base<0> - gcc_assert (r.mode_suffix_id == MODE_none || vnum_p); - - unsigned int i, nargs; -- type_suffix_index type; -+ sve_type type; - if (!r.check_gp_argument (vnum_p ? 3 : 2, i, nargs) - || !r.require_pointer_type (i) - || (vnum_p && !r.require_scalar_type (i + 1, "int64_t")) -- || ((type = r.infer_tuple_type (nargs - 1)) == NUM_TYPE_SUFFIXES)) -+ || !(type = r.infer_tuple_type (nargs - 1))) - return error_mark_node; - - return r.resolve_to (r.mode_suffix_id, type); -@@ -2713,9 +2713,9 @@ struct tbl_tuple_def : public overloaded_base<0> - resolve (function_resolver &r) const OVERRIDE - { - unsigned int i, nargs; -- type_suffix_index type; -+ sve_type type; - if (!r.check_gp_argument (2, i, nargs) -- || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES -+ || !(type = r.infer_tuple_type (i)) - || !r.require_derived_vector_type (i + 1, i, type, TYPE_unsigned)) - return error_mark_node; - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc -index 1545fd78d..e98274f8a 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc -@@ -659,6 +659,21 @@ find_type_suffix_for_scalar_type (const_tree type) - return NUM_TYPE_SUFFIXES; - } - -+/* Return the implicit group suffix for intrinsics that operate on NVECTORS -+ vectors. */ -+static group_suffix_index -+num_vectors_to_group (unsigned int nvectors) -+{ -+ switch (nvectors) -+ { -+ case 1: return GROUP_none; -+ case 2: return GROUP_x2; -+ case 3: return GROUP_x3; -+ case 4: return GROUP_x4; -+ } -+ gcc_unreachable (); -+} -+ - /* Return the vector type associated with TYPE. */ - static tree - get_vector_type (sve_type type) -@@ -1282,6 +1297,27 @@ function_resolver::lookup_form (mode_suffix_index mode, - return rfn ? rfn->decl : NULL_TREE; - } - -+/* Silently check whether there is an instance of the function that has the -+ mode suffix given by MODE and the type and group suffixes implied by TYPE. -+ If the overloaded function has an explicit first type suffix (like -+ conversions do), TYPE describes the implicit second type suffix. -+ Otherwise, TYPE describes the only type suffix. -+ -+ Return the decl of the function if it exists, otherwise return null. */ -+tree -+function_resolver::lookup_form (mode_suffix_index mode, sve_type type) -+{ -+ type_suffix_index type0 = type_suffix_ids[0]; -+ type_suffix_index type1 = type_suffix_ids[1]; -+ (type0 == NUM_TYPE_SUFFIXES ? type0 : type1) = type.type; -+ -+ group_suffix_index group = group_suffix_id; -+ if (group == GROUP_none && type.num_vectors != vectors_per_tuple ()) -+ group = num_vectors_to_group (type.num_vectors); -+ -+ return lookup_form (mode, type0, type1, group); -+} -+ - /* Resolve the function to one with the mode suffix given by MODE, the - type suffixes given by TYPE0 and TYPE1, and group suffix given by - GROUP. Return its function decl on success, otherwise report an -@@ -1305,6 +1341,19 @@ function_resolver::resolve_to (mode_suffix_index mode, - return res; - } - -+/* Resolve the function to one that has the suffixes associated with MODE -+ and TYPE; see lookup_form for how TYPE is interpreted. Return the -+ function decl on success, otherwise report an error and return -+ error_mark_node. */ -+tree -+function_resolver::resolve_to (mode_suffix_index mode, sve_type type) -+{ -+ if (tree res = lookup_form (mode, type)) -+ return res; -+ -+ return report_no_such_form (type); -+} -+ - /* Require argument ARGNO to be a 32-bit or 64-bit scalar integer type. - Return the associated type suffix on success, otherwise report an - error and return NUM_TYPE_SUFFIXES. */ -@@ -1424,21 +1473,20 @@ function_resolver::infer_sve_type (unsigned int argno) - - /* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS - vectors; NUM_VECTORS is 1 for the former. Return the associated type -- suffix on success, using TYPE_SUFFIX_b for predicates. Report an error -- and return NUM_TYPE_SUFFIXES on failure. */ --type_suffix_index -+ on success. Report an error on failure. */ -+sve_type - function_resolver::infer_vector_or_tuple_type (unsigned int argno, - unsigned int num_vectors) - { - auto type = infer_sve_type (argno); - if (!type) -- return NUM_TYPE_SUFFIXES; -+ return type; - - if (type.num_vectors == num_vectors) -- return type.type; -+ return type; - - report_incorrect_num_vectors (argno, type, num_vectors); -- return NUM_TYPE_SUFFIXES; -+ return {}; - } - - /* Require argument ARGNO to have some form of vector type. Return the -@@ -1447,7 +1495,9 @@ function_resolver::infer_vector_or_tuple_type (unsigned int argno, - type_suffix_index - function_resolver::infer_vector_type (unsigned int argno) - { -- return infer_vector_or_tuple_type (argno, 1); -+ if (auto type = infer_vector_or_tuple_type (argno, 1)) -+ return type.type; -+ return NUM_TYPE_SUFFIXES; - } - - /* Like infer_vector_type, but also require the type to be integral. */ -@@ -1512,10 +1562,9 @@ function_resolver::infer_sd_vector_type (unsigned int argno) - - /* If the function operates on tuples of vectors, require argument ARGNO to be - a tuple with the appropriate number of vectors, otherwise require it to be -- a single vector. Return the associated type suffix on success, using -- TYPE_SUFFIX_b for predicates. Report an error and return NUM_TYPE_SUFFIXES -+ a single vector. Return the associated type on success. Report an error - on failure. */ --type_suffix_index -+sve_type - function_resolver::infer_tuple_type (unsigned int argno) - { - return infer_vector_or_tuple_type (argno, vectors_per_tuple ()); -@@ -1567,10 +1616,10 @@ function_resolver::require_vector_type (unsigned int argno, - bool - function_resolver::require_matching_vector_type (unsigned int argno, - unsigned int first_argno, -- type_suffix_index type) -+ sve_type type) - { -- type_suffix_index new_type = infer_vector_type (argno); -- if (new_type == NUM_TYPE_SUFFIXES) -+ sve_type new_type = infer_sve_type (argno); -+ if (!new_type) - return false; - - if (type != new_type) -@@ -1613,15 +1662,13 @@ function_resolver::require_matching_vector_type (unsigned int argno, - bool function_resolver:: - require_derived_vector_type (unsigned int argno, - unsigned int first_argno, -- type_suffix_index first_type, -+ sve_type first_type, - type_class_index expected_tclass, - unsigned int expected_bits) - { - /* If the type needs to match FIRST_ARGNO exactly, use the preferred -- error message for that case. The VECTOR_TYPE_P test excludes tuple -- types, which we handle below instead. */ -- bool both_vectors_p = VECTOR_TYPE_P (get_argument_type (first_argno)); -- if (both_vectors_p -+ error message for that case. */ -+ if (first_type.num_vectors == 1 - && expected_tclass == SAME_TYPE_CLASS - && expected_bits == SAME_SIZE) - { -@@ -1631,17 +1678,18 @@ require_derived_vector_type (unsigned int argno, - } - - /* Use FIRST_TYPE to get the expected type class and element size. */ -+ auto &first_type_suffix = type_suffixes[first_type.type]; - type_class_index orig_expected_tclass = expected_tclass; - if (expected_tclass == NUM_TYPE_CLASSES) -- expected_tclass = type_suffixes[first_type].tclass; -+ expected_tclass = first_type_suffix.tclass; - - unsigned int orig_expected_bits = expected_bits; - if (expected_bits == SAME_SIZE) -- expected_bits = type_suffixes[first_type].element_bits; -+ expected_bits = first_type_suffix.element_bits; - else if (expected_bits == HALF_SIZE) -- expected_bits = type_suffixes[first_type].element_bits / 2; -+ expected_bits = first_type_suffix.element_bits / 2; - else if (expected_bits == QUARTER_SIZE) -- expected_bits = type_suffixes[first_type].element_bits / 4; -+ expected_bits = first_type_suffix.element_bits / 4; - - /* If the expected type doesn't depend on FIRST_TYPE at all, - just check for the fixed choice of vector type. */ -@@ -1655,13 +1703,14 @@ require_derived_vector_type (unsigned int argno, - - /* Require the argument to be some form of SVE vector type, - without being specific about the type of vector we want. */ -- type_suffix_index actual_type = infer_vector_type (argno); -- if (actual_type == NUM_TYPE_SUFFIXES) -+ sve_type actual_type = infer_vector_type (argno); -+ if (!actual_type) - return false; - - /* Exit now if we got the right type. */ -- bool tclass_ok_p = (type_suffixes[actual_type].tclass == expected_tclass); -- bool size_ok_p = (type_suffixes[actual_type].element_bits == expected_bits); -+ auto &actual_type_suffix = type_suffixes[actual_type.type]; -+ bool tclass_ok_p = (actual_type_suffix.tclass == expected_tclass); -+ bool size_ok_p = (actual_type_suffix.element_bits == expected_bits); - if (tclass_ok_p && size_ok_p) - return true; - -@@ -1701,7 +1750,9 @@ require_derived_vector_type (unsigned int argno, - - /* If the arguments have consistent type classes, but a link between - the sizes has been broken, try to describe the error in those terms. */ -- if (both_vectors_p && tclass_ok_p && orig_expected_bits == SAME_SIZE) -+ if (first_type.num_vectors == 1 -+ && tclass_ok_p -+ && orig_expected_bits == SAME_SIZE) - { - if (argno < first_argno) - { -@@ -1718,11 +1769,11 @@ require_derived_vector_type (unsigned int argno, - - /* Likewise in reverse: look for cases in which the sizes are consistent - but a link between the type classes has been broken. */ -- if (both_vectors_p -+ if (first_type.num_vectors == 1 - && size_ok_p - && orig_expected_tclass == SAME_TYPE_CLASS -- && type_suffixes[first_type].integer_p -- && type_suffixes[actual_type].integer_p) -+ && first_type_suffix.integer_p -+ && actual_type_suffix.integer_p) - { - if (argno < first_argno) - { -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h -index f7d6cc084..a7cfff7c1 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.h -+++ b/gcc/config/aarch64/aarch64-sve-builtins.h -@@ -458,28 +458,28 @@ public: - type_suffix_index = NUM_TYPE_SUFFIXES, - type_suffix_index = NUM_TYPE_SUFFIXES, - group_suffix_index = GROUP_none); -+ tree lookup_form (mode_suffix_index, sve_type); - tree resolve_to (mode_suffix_index, - type_suffix_index = NUM_TYPE_SUFFIXES, - type_suffix_index = NUM_TYPE_SUFFIXES, - group_suffix_index = GROUP_none); -+ tree resolve_to (mode_suffix_index, sve_type); - - type_suffix_index infer_integer_scalar_type (unsigned int); - type_suffix_index infer_pointer_type (unsigned int, bool = false); - sve_type infer_sve_type (unsigned int); -- type_suffix_index infer_vector_or_tuple_type (unsigned int, unsigned int); -+ sve_type infer_vector_or_tuple_type (unsigned int, unsigned int); - type_suffix_index infer_vector_type (unsigned int); - type_suffix_index infer_integer_vector_type (unsigned int); - type_suffix_index infer_unsigned_vector_type (unsigned int); - type_suffix_index infer_sd_vector_type (unsigned int); -- type_suffix_index infer_tuple_type (unsigned int); -+ sve_type infer_tuple_type (unsigned int); - - bool require_vector_or_scalar_type (unsigned int); - - bool require_vector_type (unsigned int, vector_type_index); -- bool require_matching_vector_type (unsigned int, unsigned int, -- type_suffix_index); -- bool require_derived_vector_type (unsigned int, unsigned int, -- type_suffix_index, -+ bool require_matching_vector_type (unsigned int, unsigned int, sve_type); -+ bool require_derived_vector_type (unsigned int, unsigned int, sve_type, - type_class_index = SAME_TYPE_CLASS, - unsigned int = SAME_SIZE); - --- -2.33.0 - diff --git a/0169-Backport-SME-aarch64-Tweak-error-message-for-tuple-v.patch b/0169-Backport-SME-aarch64-Tweak-error-message-for-tuple-v.patch deleted file mode 100644 index f2f2fd0..0000000 --- a/0169-Backport-SME-aarch64-Tweak-error-message-for-tuple-v.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 1abb02c636eef4f9a5f55f243bc0c4d38ee1f849 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:22 +0000 -Subject: [PATCH 070/157] [Backport][SME] aarch64: Tweak error message for - (tuple,vector) pairs - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5ce2e22b7e02c7fbd1ab8145b632559b67ae9958 - -SME2 adds more intrinsics that take a tuple of vectors followed -by a single vector, with the two arguments expected to have the -same element type. Unlike with the existing svset* intrinsics, -the size of the tuple is not fixed by the overloaded function name. - -This patch adds an error message that (hopefully) copes better -with that combination. - -gcc/ - * config/aarch64/aarch64-sve-builtins.cc - (function_resolver::require_derived_vector_type): Add a specific - error message for the case in which the caller wants a single - vector whose element type matches a previous tuyple argument. - -gcc/testsuite/ - * gcc.target/aarch64/sve/acle/general-c/set_1.c: Tweak expected - error message. - * gcc.target/aarch64/sve/acle/general-c/set_3.c: Likewise. - * gcc.target/aarch64/sve/acle/general-c/set_5.c: Likewise. ---- - gcc/config/aarch64/aarch64-sve-builtins.cc | 13 +++++++++++++ - .../gcc.target/aarch64/sve/acle/general-c/set_1.c | 4 ++-- - .../gcc.target/aarch64/sve/acle/general-c/set_3.c | 4 ++-- - .../gcc.target/aarch64/sve/acle/general-c/set_5.c | 4 ++-- - 4 files changed, 19 insertions(+), 6 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc -index e98274f8a..9224916a7 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc -@@ -1707,6 +1707,19 @@ require_derived_vector_type (unsigned int argno, - if (!actual_type) - return false; - -+ if (orig_expected_tclass == SAME_TYPE_CLASS -+ && orig_expected_bits == SAME_SIZE) -+ { -+ if (actual_type.type == first_type.type) -+ return true; -+ -+ error_at (location, "passing %qT to argument %d of %qE, but" -+ " argument %d was a tuple of %qT", -+ get_vector_type (actual_type), argno + 1, fndecl, -+ first_argno + 1, get_vector_type (first_type.type)); -+ return false; -+ } -+ - /* Exit now if we got the right type. */ - auto &actual_type_suffix = type_suffixes[actual_type.type]; - bool tclass_ok_p = (actual_type_suffix.tclass == expected_tclass); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c -index f07c76102..f2a6da536 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c -@@ -16,8 +16,8 @@ f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svuint8x3_t u8x3, int x) - u8x2 = svset2 (u8x3, 0, u8); /* { dg-error {passing 'svuint8x3_t' to argument 1 of 'svset2', which expects a tuple of 2 vectors} } */ - u8x2 = svset2 (pg, 0, u8); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset2', which expects a tuple of 2 vectors} } */ - u8x2 = svset2 (u8x2, 0, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 3 of 'svset2', which expects a single SVE vector rather than a tuple} } */ -- u8x2 = svset2 (u8x2, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svuint8_t' to argument 3 of 'svset2', after passing 'svuint8x2_t' to argument 1} } */ -- u8x2 = svset2 (u8x2, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svuint8_t' to argument 3 of 'svset2', after passing 'svuint8x2_t' to argument 1} } */ -+ u8x2 = svset2 (u8x2, 0, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svset2', but argument 1 was a tuple of 'svuint8_t'} } */ -+ u8x2 = svset2 (u8x2, 0, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svset2', but argument 1 was a tuple of 'svuint8_t'} } */ - u8x2 = svset2 (u8x2, x, u8); /* { dg-error {argument 2 of 'svset2' must be an integer constant expression} } */ - u8x2 = svset2 (u8x2, 0, u8); - f64 = svset2 (u8x2, 0, u8); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svuint8x2_t'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c -index 543a1bea8..92b955f83 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c -@@ -17,8 +17,8 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svfloat16x4_t f16x4, - f16x3 = svset3 (f16x4, 0, f16); /* { dg-error {passing 'svfloat16x4_t' to argument 1 of 'svset3', which expects a tuple of 3 vectors} } */ - f16x3 = svset3 (pg, 0, f16); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset3', which expects a tuple of 3 vectors} } */ - f16x3 = svset3 (f16x3, 0, f16x3); /* { dg-error {passing 'svfloat16x3_t' to argument 3 of 'svset3', which expects a single SVE vector rather than a tuple} } */ -- f16x3 = svset3 (f16x3, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svfloat16_t' to argument 3 of 'svset3', after passing 'svfloat16x3_t' to argument 1} } */ -- f16x3 = svset3 (f16x3, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svfloat16_t' to argument 3 of 'svset3', after passing 'svfloat16x3_t' to argument 1} } */ -+ f16x3 = svset3 (f16x3, 0, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svset3', but argument 1 was a tuple of 'svfloat16_t'} } */ -+ f16x3 = svset3 (f16x3, 0, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svset3', but argument 1 was a tuple of 'svfloat16_t'} } */ - f16x3 = svset3 (f16x3, x, f16); /* { dg-error {argument 2 of 'svset3' must be an integer constant expression} } */ - f16x3 = svset3 (f16x3, 0, f16); - f64 = svset3 (f16x3, 0, f16); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svfloat16x3_t'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c -index be911a731..f0696fb07 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c -@@ -16,8 +16,8 @@ f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svint32x2_t s32x2, int x) - s32x4 = svset4 (s32x2, 0, s32); /* { dg-error {passing 'svint32x2_t' to argument 1 of 'svset4', which expects a tuple of 4 vectors} } */ - s32x4 = svset4 (pg, 0, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset4', which expects a tuple of 4 vectors} } */ - s32x4 = svset4 (s32x4, 0, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 3 of 'svset4', which expects a single SVE vector rather than a tuple} } */ -- s32x4 = svset4 (s32x4, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svint32_t' to argument 3 of 'svset4', after passing 'svint32x4_t' to argument 1} } */ -- s32x4 = svset4 (s32x4, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svint32_t' to argument 3 of 'svset4', after passing 'svint32x4_t' to argument 1} } */ -+ s32x4 = svset4 (s32x4, 0, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svset4', but argument 1 was a tuple of 'svint32_t'} } */ -+ s32x4 = svset4 (s32x4, 0, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svset4', but argument 1 was a tuple of 'svint32_t'} } */ - s32x4 = svset4 (s32x4, x, s32); /* { dg-error {argument 2 of 'svset4' must be an integer constant expression} } */ - s32x4 = svset4 (s32x4, 0, s32); - f64 = svset4 (s32x4, 0, s32); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svint32x4_t'} } */ --- -2.33.0 - diff --git a/0170-Backport-SME-aarch64-Add-tuple-forms-of-svreinterpre.patch b/0170-Backport-SME-aarch64-Add-tuple-forms-of-svreinterpre.patch deleted file mode 100644 index b4807e2..0000000 --- a/0170-Backport-SME-aarch64-Add-tuple-forms-of-svreinterpre.patch +++ /dev/null @@ -1,1236 +0,0 @@ -From 95234ef07c47dda7ac6a13f75619580a6683118c Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:22 +0000 -Subject: [PATCH 071/157] [Backport][SME] aarch64: Add tuple forms of - svreinterpret - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1ce9dc263c2f6d455b2013fc58932beda2a4ae92 - -SME2 adds a number of intrinsics that operate on tuples of 2 and 4 -vectors. The ACLE therefore extends the existing svreinterpret -intrinsics to handle tuples as well. - -gcc/ - * config/aarch64/aarch64-sve-builtins-base.cc - (svreinterpret_impl::fold): Punt on tuple forms. - (svreinterpret_impl::expand): Use tuple_mode instead of vector_mode. - * config/aarch64/aarch64-sve-builtins-base.def (svreinterpret): - Extend to x1234 groups. - * config/aarch64/aarch64-sve-builtins-functions.h - (multi_vector_function::vectors_per_tuple): If the function has - a group suffix, get the number of vectors from there. - * config/aarch64/aarch64-sve-builtins-shapes.h (reinterpret): Declare. - * config/aarch64/aarch64-sve-builtins-shapes.cc (reinterpret_def) - (reinterpret): New function shape. - * config/aarch64/aarch64-sve-builtins.cc (function_groups): Handle - DEF_SVE_FUNCTION_GS. - * config/aarch64/aarch64-sve-builtins.def (DEF_SVE_FUNCTION_GS): New - macro. - (DEF_SVE_FUNCTION): Forward to DEF_SVE_FUNCTION_GS by default. - * config/aarch64/aarch64-sve-builtins.h - (function_instance::tuple_mode): New member function. - (function_base::vectors_per_tuple): Take the function instance - as argument and get the number from the group suffix. - (function_instance::vectors_per_tuple): Update accordingly. - * config/aarch64/iterators.md (SVE_FULLx2, SVE_FULLx3, SVE_FULLx4) - (SVE_ALL_STRUCT): New mode iterators. - (SVE_STRUCT): Redefine in terms of SVE_FULL*. - * config/aarch64/aarch64-sve.md (@aarch64_sve_reinterpret) - (*aarch64_sve_reinterpret): Extend to SVE structure modes. - -gcc/testsuite/ - * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h (TEST_DUAL_XN): - New macro. - * gcc.target/aarch64/sve/acle/asm/reinterpret_bf16.c: Add tests for - tuple forms. - * gcc.target/aarch64/sve/acle/asm/reinterpret_f16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/reinterpret_f32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/reinterpret_f64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/reinterpret_s16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/reinterpret_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/reinterpret_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/reinterpret_s8.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/reinterpret_u16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/reinterpret_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/reinterpret_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/reinterpret_u8.c: Likewise. ---- - .../aarch64/aarch64-sve-builtins-base.cc | 5 +- - .../aarch64/aarch64-sve-builtins-base.def | 2 +- - .../aarch64/aarch64-sve-builtins-functions.h | 7 ++- - .../aarch64/aarch64-sve-builtins-shapes.cc | 28 +++++++++ - .../aarch64/aarch64-sve-builtins-shapes.h | 1 + - gcc/config/aarch64/aarch64-sve-builtins.cc | 8 ++- - gcc/config/aarch64/aarch64-sve-builtins.def | 8 ++- - gcc/config/aarch64/aarch64-sve-builtins.h | 20 +++++- - gcc/config/aarch64/aarch64-sve.md | 8 +-- - gcc/config/aarch64/iterators.md | 26 +++++--- - .../aarch64/sve/acle/asm/reinterpret_bf16.c | 62 +++++++++++++++++++ - .../aarch64/sve/acle/asm/reinterpret_f16.c | 62 +++++++++++++++++++ - .../aarch64/sve/acle/asm/reinterpret_f32.c | 62 +++++++++++++++++++ - .../aarch64/sve/acle/asm/reinterpret_f64.c | 62 +++++++++++++++++++ - .../aarch64/sve/acle/asm/reinterpret_s16.c | 62 +++++++++++++++++++ - .../aarch64/sve/acle/asm/reinterpret_s32.c | 62 +++++++++++++++++++ - .../aarch64/sve/acle/asm/reinterpret_s64.c | 62 +++++++++++++++++++ - .../aarch64/sve/acle/asm/reinterpret_s8.c | 62 +++++++++++++++++++ - .../aarch64/sve/acle/asm/reinterpret_u16.c | 62 +++++++++++++++++++ - .../aarch64/sve/acle/asm/reinterpret_u32.c | 62 +++++++++++++++++++ - .../aarch64/sve/acle/asm/reinterpret_u64.c | 62 +++++++++++++++++++ - .../aarch64/sve/acle/asm/reinterpret_u8.c | 62 +++++++++++++++++++ - .../aarch64/sve/acle/asm/test_sve_acle.h | 14 +++++ - 23 files changed, 851 insertions(+), 20 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc -index c9bf13792..53f3f28f9 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc -@@ -1928,6 +1928,9 @@ public: - gimple * - fold (gimple_folder &f) const OVERRIDE - { -+ if (f.vectors_per_tuple () > 1) -+ return NULL; -+ - /* Punt to rtl if the effect of the reinterpret on registers does not - conform to GCC's endianness model. */ - if (!targetm.can_change_mode_class (f.vector_mode (0), -@@ -1944,7 +1947,7 @@ public: - rtx - expand (function_expander &e) const OVERRIDE - { -- machine_mode mode = e.vector_mode (0); -+ machine_mode mode = e.tuple_mode (0); - return e.use_exact_insn (code_for_aarch64_sve_reinterpret (mode)); - } - }; -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def -index 3a58f76c3..756469959 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-base.def -+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def -@@ -248,7 +248,7 @@ DEF_SVE_FUNCTION (svrdffr, rdffr, none, z_or_none) - DEF_SVE_FUNCTION (svrecpe, unary, all_float, none) - DEF_SVE_FUNCTION (svrecps, binary, all_float, none) - DEF_SVE_FUNCTION (svrecpx, unary, all_float, mxz) --DEF_SVE_FUNCTION (svreinterpret, unary_convert, reinterpret, none) -+DEF_SVE_FUNCTION_GS (svreinterpret, reinterpret, reinterpret, x1234, none) - DEF_SVE_FUNCTION (svrev, unary, all_data, none) - DEF_SVE_FUNCTION (svrev, unary_pred, all_pred, none) - DEF_SVE_FUNCTION (svrevb, unary, hsd_integer, mxz) -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h -index 9d346b6ff..94a6d1207 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h -+++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h -@@ -59,8 +59,13 @@ public: - : m_vectors_per_tuple (vectors_per_tuple) {} - - unsigned int -- vectors_per_tuple () const OVERRIDE -+ vectors_per_tuple (const function_instance &fi) const override - { -+ if (fi.group_suffix_id != GROUP_none) -+ { -+ gcc_checking_assert (m_vectors_per_tuple == 1); -+ return fi.group_suffix ().vectors_per_tuple; -+ } - return m_vectors_per_tuple; - } - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -index f187b4cb2..95e40d8f3 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -@@ -2400,6 +2400,34 @@ struct reduction_wide_def : public overloaded_base<0> - }; - SHAPE (reduction_wide) - -+/* svx_t svfoo_t0[_t1_g](svx_t) -+ -+ where the target type must be specified explicitly but the source -+ type can be inferred. */ -+struct reinterpret_def : public overloaded_base<1> -+{ -+ bool explicit_group_suffix_p () const override { return false; } -+ -+ void -+ build (function_builder &b, const function_group_info &group) const override -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "t0,t1", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const override -+ { -+ sve_type type; -+ if (!r.check_num_arguments (1) -+ || !(type = r.infer_sve_type (0))) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+}; -+SHAPE (reinterpret) -+ - /* svxN_t svfoo[_t0](svxN_t, uint64_t, sv_t) - - where the second argument is an integer constant expression in the -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h -index 3b0025f85..2b06152d4 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h -+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h -@@ -133,6 +133,7 @@ namespace aarch64_sve - extern const function_shape *const rdffr; - extern const function_shape *const reduction; - extern const function_shape *const reduction_wide; -+ extern const function_shape *const reinterpret; - extern const function_shape *const set; - extern const function_shape *const setffr; - extern const function_shape *const shift_left_imm_long; -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc -index 9224916a7..c439f2e8a 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc -@@ -494,6 +494,10 @@ static const group_suffix_index groups_none[] = { - GROUP_none, NUM_GROUP_SUFFIXES - }; - -+static const group_suffix_index groups_x1234[] = { -+ GROUP_none, GROUP_x2, GROUP_x3, GROUP_x4, NUM_GROUP_SUFFIXES -+}; -+ - /* Used by functions that have no governing predicate. */ - static const predication_index preds_none[] = { PRED_none, NUM_PREDS }; - -@@ -534,8 +538,8 @@ static const predication_index preds_z[] = { PRED_z, NUM_PREDS }; - - /* A list of all SVE ACLE functions. */ - static CONSTEXPR const function_group_info function_groups[] = { --#define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \ -- { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, groups_none, \ -+#define DEF_SVE_FUNCTION_GS(NAME, SHAPE, TYPES, GROUPS, PREDS) \ -+ { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, groups_##GROUPS, \ - preds_##PREDS, REQUIRED_EXTENSIONS }, - #include "aarch64-sve-builtins.def" - }; -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def -index d9bf9c350..be10b5ea1 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.def -+++ b/gcc/config/aarch64/aarch64-sve-builtins.def -@@ -33,8 +33,13 @@ - #define DEF_SVE_GROUP_SUFFIX(A, B, C) - #endif - -+#ifndef DEF_SVE_FUNCTION_GS -+#define DEF_SVE_FUNCTION_GS(A, B, C, D, E) -+#endif -+ - #ifndef DEF_SVE_FUNCTION --#define DEF_SVE_FUNCTION(A, B, C, D) -+#define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \ -+ DEF_SVE_FUNCTION_GS (NAME, SHAPE, TYPES, none, PREDS) - #endif - - DEF_SVE_MODE (n, none, none, none) -@@ -107,6 +112,7 @@ DEF_SVE_GROUP_SUFFIX (x4, 0, 4) - #include "aarch64-sve-builtins-sve2.def" - - #undef DEF_SVE_FUNCTION -+#undef DEF_SVE_FUNCTION_GS - #undef DEF_SVE_GROUP_SUFFIX - #undef DEF_SVE_TYPE_SUFFIX - #undef DEF_SVE_TYPE -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h -index a7cfff7c1..7132b6e77 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.h -+++ b/gcc/config/aarch64/aarch64-sve-builtins.h -@@ -364,6 +364,7 @@ public: - tree tuple_type (unsigned int) const; - unsigned int elements_per_vq (unsigned int i) const; - machine_mode vector_mode (unsigned int) const; -+ machine_mode tuple_mode (unsigned int) const; - machine_mode gp_mode (unsigned int) const; - - /* The properties of the function. */ -@@ -664,7 +665,7 @@ public: - - /* If the function operates on tuples of vectors, return the number - of vectors in the tuples, otherwise return 1. */ -- virtual unsigned int vectors_per_tuple () const { return 1; } -+ virtual unsigned int vectors_per_tuple (const function_instance &) const; - - /* If the function addresses memory, return the type of a single - scalar memory element. */ -@@ -836,7 +837,7 @@ function_instance::operator!= (const function_instance &other) const - inline unsigned int - function_instance::vectors_per_tuple () const - { -- return base->vectors_per_tuple (); -+ return base->vectors_per_tuple (*this); - } - - /* If the function addresses memory, return the type of a single -@@ -940,6 +941,15 @@ function_instance::vector_mode (unsigned int i) const - return type_suffix (i).vector_mode; - } - -+/* Return the mode of tuple_type (I). */ -+inline machine_mode -+function_instance::tuple_mode (unsigned int i) const -+{ -+ if (group_suffix ().vectors_per_tuple > 1) -+ return TYPE_MODE (tuple_type (i)); -+ return vector_mode (i); -+} -+ - /* Return the mode of the governing predicate to use when operating on - type suffix I. */ - inline machine_mode -@@ -966,6 +976,12 @@ function_base::call_properties (const function_instance &instance) const - return flags; - } - -+inline unsigned int -+function_base::vectors_per_tuple (const function_instance &instance) const -+{ -+ return instance.group_suffix ().vectors_per_tuple; -+} -+ - /* Return the mode of the result of a call. */ - inline machine_mode - function_expander::result_mode () const -diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md -index b8cc47ef5..28b73d807 100644 ---- a/gcc/config/aarch64/aarch64-sve.md -+++ b/gcc/config/aarch64/aarch64-sve.md -@@ -784,8 +784,8 @@ - ;; This is equivalent to a subreg on little-endian targets but not for - ;; big-endian; see the comment at the head of the file for details. - (define_expand "@aarch64_sve_reinterpret" -- [(set (match_operand:SVE_ALL 0 "register_operand") -- (unspec:SVE_ALL -+ [(set (match_operand:SVE_ALL_STRUCT 0 "register_operand") -+ (unspec:SVE_ALL_STRUCT - [(match_operand 1 "aarch64_any_register_operand")] - UNSPEC_REINTERPRET))] - "TARGET_SVE" -@@ -802,8 +802,8 @@ - ;; A pattern for handling type punning on big-endian targets. We use a - ;; special predicate for operand 1 to reduce the number of patterns. - (define_insn_and_split "*aarch64_sve_reinterpret" -- [(set (match_operand:SVE_ALL 0 "register_operand" "=w") -- (unspec:SVE_ALL -+ [(set (match_operand:SVE_ALL_STRUCT 0 "register_operand" "=w") -+ (unspec:SVE_ALL_STRUCT - [(match_operand 1 "aarch64_any_register_operand" "w")] - UNSPEC_REINTERPRET))] - "TARGET_SVE" -diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md -index a8a39b65a..8dd2035bc 100644 ---- a/gcc/config/aarch64/iterators.md -+++ b/gcc/config/aarch64/iterators.md -@@ -451,14 +451,6 @@ - (define_mode_iterator VNx2DI_ONLY [VNx2DI]) - (define_mode_iterator VNx2DF_ONLY [VNx2DF]) - --;; All SVE vector structure modes. --(define_mode_iterator SVE_STRUCT [VNx32QI VNx16HI VNx8SI VNx4DI -- VNx16BF VNx16HF VNx8SF VNx4DF -- VNx48QI VNx24HI VNx12SI VNx6DI -- VNx24BF VNx24HF VNx12SF VNx6DF -- VNx64QI VNx32HI VNx16SI VNx8DI -- VNx32BF VNx32HF VNx16SF VNx8DF]) -- - ;; All fully-packed SVE vector modes. - (define_mode_iterator SVE_FULL [VNx16QI VNx8HI VNx4SI VNx2DI - VNx8BF VNx8HF VNx4SF VNx2DF]) -@@ -530,6 +522,24 @@ - VNx2DI - VNx2DF]) - -+;; All SVE 2-vector modes. -+(define_mode_iterator SVE_FULLx2 [VNx32QI VNx16HI VNx8SI VNx4DI -+ VNx16BF VNx16HF VNx8SF VNx4DF]) -+ -+;; All SVE 3-vector modes. -+(define_mode_iterator SVE_FULLx3 [VNx48QI VNx24HI VNx12SI VNx6DI -+ VNx24BF VNx24HF VNx12SF VNx6DF]) -+ -+;; All SVE 4-vector modes. -+(define_mode_iterator SVE_FULLx4 [VNx64QI VNx32HI VNx16SI VNx8DI -+ VNx32BF VNx32HF VNx16SF VNx8DF]) -+ -+;; All SVE vector structure modes. -+(define_mode_iterator SVE_STRUCT [SVE_FULLx2 SVE_FULLx3 SVE_FULLx4]) -+ -+;; All SVE vector and structure modes. -+(define_mode_iterator SVE_ALL_STRUCT [SVE_ALL SVE_STRUCT]) -+ - ;; All SVE integer vector modes. - (define_mode_iterator SVE_I [VNx16QI VNx8QI VNx4QI VNx2QI - VNx8HI VNx4HI VNx2HI -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_bf16.c -index 2d2c2a714..dd0daf2ef 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_bf16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_bf16.c -@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_bf16_u64_tied1, svbfloat16_t, svuint64_t, - TEST_DUAL_Z (reinterpret_bf16_u64_untied, svbfloat16_t, svuint64_t, - z0 = svreinterpret_bf16_u64 (z4), - z0 = svreinterpret_bf16 (z4)) -+ -+/* -+** reinterpret_bf16_bf16_x2_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_bf16_bf16_x2_tied1, svbfloat16x2_t, svbfloat16x2_t, -+ z0_res = svreinterpret_bf16_bf16_x2 (z0), -+ z0_res = svreinterpret_bf16 (z0)) -+ -+/* -+** reinterpret_bf16_f32_x2_untied: -+** ( -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** | -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** ) -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_bf16_f32_x2_untied, svbfloat16x2_t, svfloat32x2_t, z0, -+ svreinterpret_bf16_f32_x2 (z4), -+ svreinterpret_bf16 (z4)) -+ -+/* -+** reinterpret_bf16_s64_x3_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_bf16_s64_x3_tied1, svbfloat16x3_t, svint64x3_t, -+ z0_res = svreinterpret_bf16_s64_x3 (z0), -+ z0_res = svreinterpret_bf16 (z0)) -+ -+/* -+** reinterpret_bf16_u8_x3_untied: -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_bf16_u8_x3_untied, svbfloat16x3_t, svuint8x3_t, z18, -+ svreinterpret_bf16_u8_x3 (z23), -+ svreinterpret_bf16 (z23)) -+ -+/* -+** reinterpret_bf16_u32_x4_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_bf16_u32_x4_tied1, svbfloat16x4_t, svuint32x4_t, -+ z0_res = svreinterpret_bf16_u32_x4 (z0), -+ z0_res = svreinterpret_bf16 (z0)) -+ -+/* -+** reinterpret_bf16_f64_x4_untied: -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_bf16_f64_x4_untied, svbfloat16x4_t, svfloat64x4_t, z28, -+ svreinterpret_bf16_f64_x4 (z4), -+ svreinterpret_bf16 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f16.c -index 60705e628..9b6f8227d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f16.c -@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_f16_u64_tied1, svfloat16_t, svuint64_t, - TEST_DUAL_Z (reinterpret_f16_u64_untied, svfloat16_t, svuint64_t, - z0 = svreinterpret_f16_u64 (z4), - z0 = svreinterpret_f16 (z4)) -+ -+/* -+** reinterpret_f16_bf16_x2_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f16_bf16_x2_tied1, svfloat16x2_t, svbfloat16x2_t, -+ z0_res = svreinterpret_f16_bf16_x2 (z0), -+ z0_res = svreinterpret_f16 (z0)) -+ -+/* -+** reinterpret_f16_f32_x2_untied: -+** ( -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** | -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** ) -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_f16_f32_x2_untied, svfloat16x2_t, svfloat32x2_t, z0, -+ svreinterpret_f16_f32_x2 (z4), -+ svreinterpret_f16 (z4)) -+ -+/* -+** reinterpret_f16_s64_x3_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f16_s64_x3_tied1, svfloat16x3_t, svint64x3_t, -+ z0_res = svreinterpret_f16_s64_x3 (z0), -+ z0_res = svreinterpret_f16 (z0)) -+ -+/* -+** reinterpret_f16_u8_x3_untied: -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_f16_u8_x3_untied, svfloat16x3_t, svuint8x3_t, z18, -+ svreinterpret_f16_u8_x3 (z23), -+ svreinterpret_f16 (z23)) -+ -+/* -+** reinterpret_f16_u32_x4_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f16_u32_x4_tied1, svfloat16x4_t, svuint32x4_t, -+ z0_res = svreinterpret_f16_u32_x4 (z0), -+ z0_res = svreinterpret_f16 (z0)) -+ -+/* -+** reinterpret_f16_f64_x4_untied: -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_f16_f64_x4_untied, svfloat16x4_t, svfloat64x4_t, z28, -+ svreinterpret_f16_f64_x4 (z4), -+ svreinterpret_f16 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f32.c -index 06fc46f25..ce981fce9 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f32.c -@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_f32_u64_tied1, svfloat32_t, svuint64_t, - TEST_DUAL_Z (reinterpret_f32_u64_untied, svfloat32_t, svuint64_t, - z0 = svreinterpret_f32_u64 (z4), - z0 = svreinterpret_f32 (z4)) -+ -+/* -+** reinterpret_f32_bf16_x2_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f32_bf16_x2_tied1, svfloat32x2_t, svbfloat16x2_t, -+ z0_res = svreinterpret_f32_bf16_x2 (z0), -+ z0_res = svreinterpret_f32 (z0)) -+ -+/* -+** reinterpret_f32_f32_x2_untied: -+** ( -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** | -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** ) -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_f32_f32_x2_untied, svfloat32x2_t, svfloat32x2_t, z0, -+ svreinterpret_f32_f32_x2 (z4), -+ svreinterpret_f32 (z4)) -+ -+/* -+** reinterpret_f32_s64_x3_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f32_s64_x3_tied1, svfloat32x3_t, svint64x3_t, -+ z0_res = svreinterpret_f32_s64_x3 (z0), -+ z0_res = svreinterpret_f32 (z0)) -+ -+/* -+** reinterpret_f32_u8_x3_untied: -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_f32_u8_x3_untied, svfloat32x3_t, svuint8x3_t, z18, -+ svreinterpret_f32_u8_x3 (z23), -+ svreinterpret_f32 (z23)) -+ -+/* -+** reinterpret_f32_u32_x4_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f32_u32_x4_tied1, svfloat32x4_t, svuint32x4_t, -+ z0_res = svreinterpret_f32_u32_x4 (z0), -+ z0_res = svreinterpret_f32 (z0)) -+ -+/* -+** reinterpret_f32_f64_x4_untied: -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_f32_f64_x4_untied, svfloat32x4_t, svfloat64x4_t, z28, -+ svreinterpret_f32_f64_x4 (z4), -+ svreinterpret_f32 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f64.c -index 003ee3fe2..4f51824ab 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f64.c -@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_f64_u64_tied1, svfloat64_t, svuint64_t, - TEST_DUAL_Z (reinterpret_f64_u64_untied, svfloat64_t, svuint64_t, - z0 = svreinterpret_f64_u64 (z4), - z0 = svreinterpret_f64 (z4)) -+ -+/* -+** reinterpret_f64_bf16_x2_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f64_bf16_x2_tied1, svfloat64x2_t, svbfloat16x2_t, -+ z0_res = svreinterpret_f64_bf16_x2 (z0), -+ z0_res = svreinterpret_f64 (z0)) -+ -+/* -+** reinterpret_f64_f32_x2_untied: -+** ( -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** | -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** ) -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_f64_f32_x2_untied, svfloat64x2_t, svfloat32x2_t, z0, -+ svreinterpret_f64_f32_x2 (z4), -+ svreinterpret_f64 (z4)) -+ -+/* -+** reinterpret_f64_s64_x3_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f64_s64_x3_tied1, svfloat64x3_t, svint64x3_t, -+ z0_res = svreinterpret_f64_s64_x3 (z0), -+ z0_res = svreinterpret_f64 (z0)) -+ -+/* -+** reinterpret_f64_u8_x3_untied: -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_f64_u8_x3_untied, svfloat64x3_t, svuint8x3_t, z18, -+ svreinterpret_f64_u8_x3 (z23), -+ svreinterpret_f64 (z23)) -+ -+/* -+** reinterpret_f64_u32_x4_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f64_u32_x4_tied1, svfloat64x4_t, svuint32x4_t, -+ z0_res = svreinterpret_f64_u32_x4 (z0), -+ z0_res = svreinterpret_f64 (z0)) -+ -+/* -+** reinterpret_f64_f64_x4_untied: -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_f64_f64_x4_untied, svfloat64x4_t, svfloat64x4_t, z28, -+ svreinterpret_f64_f64_x4 (z4), -+ svreinterpret_f64 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s16.c -index d62817c2c..7e15f3e9b 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s16.c -@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_s16_u64_tied1, svint16_t, svuint64_t, - TEST_DUAL_Z (reinterpret_s16_u64_untied, svint16_t, svuint64_t, - z0 = svreinterpret_s16_u64 (z4), - z0 = svreinterpret_s16 (z4)) -+ -+/* -+** reinterpret_s16_bf16_x2_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s16_bf16_x2_tied1, svint16x2_t, svbfloat16x2_t, -+ z0_res = svreinterpret_s16_bf16_x2 (z0), -+ z0_res = svreinterpret_s16 (z0)) -+ -+/* -+** reinterpret_s16_f32_x2_untied: -+** ( -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** | -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** ) -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_s16_f32_x2_untied, svint16x2_t, svfloat32x2_t, z0, -+ svreinterpret_s16_f32_x2 (z4), -+ svreinterpret_s16 (z4)) -+ -+/* -+** reinterpret_s16_s64_x3_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s16_s64_x3_tied1, svint16x3_t, svint64x3_t, -+ z0_res = svreinterpret_s16_s64_x3 (z0), -+ z0_res = svreinterpret_s16 (z0)) -+ -+/* -+** reinterpret_s16_u8_x3_untied: -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_s16_u8_x3_untied, svint16x3_t, svuint8x3_t, z18, -+ svreinterpret_s16_u8_x3 (z23), -+ svreinterpret_s16 (z23)) -+ -+/* -+** reinterpret_s16_u32_x4_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s16_u32_x4_tied1, svint16x4_t, svuint32x4_t, -+ z0_res = svreinterpret_s16_u32_x4 (z0), -+ z0_res = svreinterpret_s16 (z0)) -+ -+/* -+** reinterpret_s16_f64_x4_untied: -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_s16_f64_x4_untied, svint16x4_t, svfloat64x4_t, z28, -+ svreinterpret_s16_f64_x4 (z4), -+ svreinterpret_s16 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s32.c -index e1068f244..60da8aef3 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s32.c -@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_s32_u64_tied1, svint32_t, svuint64_t, - TEST_DUAL_Z (reinterpret_s32_u64_untied, svint32_t, svuint64_t, - z0 = svreinterpret_s32_u64 (z4), - z0 = svreinterpret_s32 (z4)) -+ -+/* -+** reinterpret_s32_bf16_x2_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s32_bf16_x2_tied1, svint32x2_t, svbfloat16x2_t, -+ z0_res = svreinterpret_s32_bf16_x2 (z0), -+ z0_res = svreinterpret_s32 (z0)) -+ -+/* -+** reinterpret_s32_f32_x2_untied: -+** ( -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** | -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** ) -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_s32_f32_x2_untied, svint32x2_t, svfloat32x2_t, z0, -+ svreinterpret_s32_f32_x2 (z4), -+ svreinterpret_s32 (z4)) -+ -+/* -+** reinterpret_s32_s64_x3_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s32_s64_x3_tied1, svint32x3_t, svint64x3_t, -+ z0_res = svreinterpret_s32_s64_x3 (z0), -+ z0_res = svreinterpret_s32 (z0)) -+ -+/* -+** reinterpret_s32_u8_x3_untied: -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_s32_u8_x3_untied, svint32x3_t, svuint8x3_t, z18, -+ svreinterpret_s32_u8_x3 (z23), -+ svreinterpret_s32 (z23)) -+ -+/* -+** reinterpret_s32_u32_x4_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s32_u32_x4_tied1, svint32x4_t, svuint32x4_t, -+ z0_res = svreinterpret_s32_u32_x4 (z0), -+ z0_res = svreinterpret_s32 (z0)) -+ -+/* -+** reinterpret_s32_f64_x4_untied: -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_s32_f64_x4_untied, svint32x4_t, svfloat64x4_t, z28, -+ svreinterpret_s32_f64_x4 (z4), -+ svreinterpret_s32 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s64.c -index cada7533c..d705c60df 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s64.c -@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_s64_u64_tied1, svint64_t, svuint64_t, - TEST_DUAL_Z (reinterpret_s64_u64_untied, svint64_t, svuint64_t, - z0 = svreinterpret_s64_u64 (z4), - z0 = svreinterpret_s64 (z4)) -+ -+/* -+** reinterpret_s64_bf16_x2_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s64_bf16_x2_tied1, svint64x2_t, svbfloat16x2_t, -+ z0_res = svreinterpret_s64_bf16_x2 (z0), -+ z0_res = svreinterpret_s64 (z0)) -+ -+/* -+** reinterpret_s64_f32_x2_untied: -+** ( -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** | -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** ) -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_s64_f32_x2_untied, svint64x2_t, svfloat32x2_t, z0, -+ svreinterpret_s64_f32_x2 (z4), -+ svreinterpret_s64 (z4)) -+ -+/* -+** reinterpret_s64_s64_x3_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s64_s64_x3_tied1, svint64x3_t, svint64x3_t, -+ z0_res = svreinterpret_s64_s64_x3 (z0), -+ z0_res = svreinterpret_s64 (z0)) -+ -+/* -+** reinterpret_s64_u8_x3_untied: -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_s64_u8_x3_untied, svint64x3_t, svuint8x3_t, z18, -+ svreinterpret_s64_u8_x3 (z23), -+ svreinterpret_s64 (z23)) -+ -+/* -+** reinterpret_s64_u32_x4_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s64_u32_x4_tied1, svint64x4_t, svuint32x4_t, -+ z0_res = svreinterpret_s64_u32_x4 (z0), -+ z0_res = svreinterpret_s64 (z0)) -+ -+/* -+** reinterpret_s64_f64_x4_untied: -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_s64_f64_x4_untied, svint64x4_t, svfloat64x4_t, z28, -+ svreinterpret_s64_f64_x4 (z4), -+ svreinterpret_s64 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s8.c -index 23a40d0ba..ab90a54d7 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s8.c -@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_s8_u64_tied1, svint8_t, svuint64_t, - TEST_DUAL_Z (reinterpret_s8_u64_untied, svint8_t, svuint64_t, - z0 = svreinterpret_s8_u64 (z4), - z0 = svreinterpret_s8 (z4)) -+ -+/* -+** reinterpret_s8_bf16_x2_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s8_bf16_x2_tied1, svint8x2_t, svbfloat16x2_t, -+ z0_res = svreinterpret_s8_bf16_x2 (z0), -+ z0_res = svreinterpret_s8 (z0)) -+ -+/* -+** reinterpret_s8_f32_x2_untied: -+** ( -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** | -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** ) -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_s8_f32_x2_untied, svint8x2_t, svfloat32x2_t, z0, -+ svreinterpret_s8_f32_x2 (z4), -+ svreinterpret_s8 (z4)) -+ -+/* -+** reinterpret_s8_s64_x3_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s8_s64_x3_tied1, svint8x3_t, svint64x3_t, -+ z0_res = svreinterpret_s8_s64_x3 (z0), -+ z0_res = svreinterpret_s8 (z0)) -+ -+/* -+** reinterpret_s8_u8_x3_untied: -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_s8_u8_x3_untied, svint8x3_t, svuint8x3_t, z18, -+ svreinterpret_s8_u8_x3 (z23), -+ svreinterpret_s8 (z23)) -+ -+/* -+** reinterpret_s8_u32_x4_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s8_u32_x4_tied1, svint8x4_t, svuint32x4_t, -+ z0_res = svreinterpret_s8_u32_x4 (z0), -+ z0_res = svreinterpret_s8 (z0)) -+ -+/* -+** reinterpret_s8_f64_x4_untied: -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_s8_f64_x4_untied, svint8x4_t, svfloat64x4_t, z28, -+ svreinterpret_s8_f64_x4 (z4), -+ svreinterpret_s8 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u16.c -index 48e8ecaff..fcfc0eb9d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u16.c -@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_u16_u64_tied1, svuint16_t, svuint64_t, - TEST_DUAL_Z (reinterpret_u16_u64_untied, svuint16_t, svuint64_t, - z0 = svreinterpret_u16_u64 (z4), - z0 = svreinterpret_u16 (z4)) -+ -+/* -+** reinterpret_u16_bf16_x2_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u16_bf16_x2_tied1, svuint16x2_t, svbfloat16x2_t, -+ z0_res = svreinterpret_u16_bf16_x2 (z0), -+ z0_res = svreinterpret_u16 (z0)) -+ -+/* -+** reinterpret_u16_f32_x2_untied: -+** ( -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** | -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** ) -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_u16_f32_x2_untied, svuint16x2_t, svfloat32x2_t, z0, -+ svreinterpret_u16_f32_x2 (z4), -+ svreinterpret_u16 (z4)) -+ -+/* -+** reinterpret_u16_s64_x3_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u16_s64_x3_tied1, svuint16x3_t, svint64x3_t, -+ z0_res = svreinterpret_u16_s64_x3 (z0), -+ z0_res = svreinterpret_u16 (z0)) -+ -+/* -+** reinterpret_u16_u8_x3_untied: -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_u16_u8_x3_untied, svuint16x3_t, svuint8x3_t, z18, -+ svreinterpret_u16_u8_x3 (z23), -+ svreinterpret_u16 (z23)) -+ -+/* -+** reinterpret_u16_u32_x4_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u16_u32_x4_tied1, svuint16x4_t, svuint32x4_t, -+ z0_res = svreinterpret_u16_u32_x4 (z0), -+ z0_res = svreinterpret_u16 (z0)) -+ -+/* -+** reinterpret_u16_f64_x4_untied: -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_u16_f64_x4_untied, svuint16x4_t, svfloat64x4_t, z28, -+ svreinterpret_u16_f64_x4 (z4), -+ svreinterpret_u16 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u32.c -index 1d4e85712..6d7e05857 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u32.c -@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_u32_u64_tied1, svuint32_t, svuint64_t, - TEST_DUAL_Z (reinterpret_u32_u64_untied, svuint32_t, svuint64_t, - z0 = svreinterpret_u32_u64 (z4), - z0 = svreinterpret_u32 (z4)) -+ -+/* -+** reinterpret_u32_bf16_x2_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u32_bf16_x2_tied1, svuint32x2_t, svbfloat16x2_t, -+ z0_res = svreinterpret_u32_bf16_x2 (z0), -+ z0_res = svreinterpret_u32 (z0)) -+ -+/* -+** reinterpret_u32_f32_x2_untied: -+** ( -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** | -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** ) -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_u32_f32_x2_untied, svuint32x2_t, svfloat32x2_t, z0, -+ svreinterpret_u32_f32_x2 (z4), -+ svreinterpret_u32 (z4)) -+ -+/* -+** reinterpret_u32_s64_x3_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u32_s64_x3_tied1, svuint32x3_t, svint64x3_t, -+ z0_res = svreinterpret_u32_s64_x3 (z0), -+ z0_res = svreinterpret_u32 (z0)) -+ -+/* -+** reinterpret_u32_u8_x3_untied: -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_u32_u8_x3_untied, svuint32x3_t, svuint8x3_t, z18, -+ svreinterpret_u32_u8_x3 (z23), -+ svreinterpret_u32 (z23)) -+ -+/* -+** reinterpret_u32_u32_x4_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u32_u32_x4_tied1, svuint32x4_t, svuint32x4_t, -+ z0_res = svreinterpret_u32_u32_x4 (z0), -+ z0_res = svreinterpret_u32 (z0)) -+ -+/* -+** reinterpret_u32_f64_x4_untied: -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_u32_f64_x4_untied, svuint32x4_t, svfloat64x4_t, z28, -+ svreinterpret_u32_f64_x4 (z4), -+ svreinterpret_u32 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u64.c -index 07af69dce..55c0baefb 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u64.c -@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_u64_u64_tied1, svuint64_t, svuint64_t, - TEST_DUAL_Z (reinterpret_u64_u64_untied, svuint64_t, svuint64_t, - z0 = svreinterpret_u64_u64 (z4), - z0 = svreinterpret_u64 (z4)) -+ -+/* -+** reinterpret_u64_bf16_x2_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u64_bf16_x2_tied1, svuint64x2_t, svbfloat16x2_t, -+ z0_res = svreinterpret_u64_bf16_x2 (z0), -+ z0_res = svreinterpret_u64 (z0)) -+ -+/* -+** reinterpret_u64_f32_x2_untied: -+** ( -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** | -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** ) -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_u64_f32_x2_untied, svuint64x2_t, svfloat32x2_t, z0, -+ svreinterpret_u64_f32_x2 (z4), -+ svreinterpret_u64 (z4)) -+ -+/* -+** reinterpret_u64_s64_x3_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u64_s64_x3_tied1, svuint64x3_t, svint64x3_t, -+ z0_res = svreinterpret_u64_s64_x3 (z0), -+ z0_res = svreinterpret_u64 (z0)) -+ -+/* -+** reinterpret_u64_u8_x3_untied: -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_u64_u8_x3_untied, svuint64x3_t, svuint8x3_t, z18, -+ svreinterpret_u64_u8_x3 (z23), -+ svreinterpret_u64 (z23)) -+ -+/* -+** reinterpret_u64_u32_x4_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u64_u32_x4_tied1, svuint64x4_t, svuint32x4_t, -+ z0_res = svreinterpret_u64_u32_x4 (z0), -+ z0_res = svreinterpret_u64 (z0)) -+ -+/* -+** reinterpret_u64_f64_x4_untied: -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_u64_f64_x4_untied, svuint64x4_t, svfloat64x4_t, z28, -+ svreinterpret_u64_f64_x4 (z4), -+ svreinterpret_u64 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u8.c -index a4c7f4c8d..f73021961 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u8.c -@@ -205,3 +205,65 @@ TEST_DUAL_Z_REV (reinterpret_u8_u64_tied1, svuint8_t, svuint64_t, - TEST_DUAL_Z (reinterpret_u8_u64_untied, svuint8_t, svuint64_t, - z0 = svreinterpret_u8_u64 (z4), - z0 = svreinterpret_u8 (z4)) -+ -+/* -+** reinterpret_u8_bf16_x2_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u8_bf16_x2_tied1, svuint8x2_t, svbfloat16x2_t, -+ z0_res = svreinterpret_u8_bf16_x2 (z0), -+ z0_res = svreinterpret_u8 (z0)) -+ -+/* -+** reinterpret_u8_f32_x2_untied: -+** ( -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** | -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** ) -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_u8_f32_x2_untied, svuint8x2_t, svfloat32x2_t, z0, -+ svreinterpret_u8_f32_x2 (z4), -+ svreinterpret_u8 (z4)) -+ -+/* -+** reinterpret_u8_s64_x3_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u8_s64_x3_tied1, svuint8x3_t, svint64x3_t, -+ z0_res = svreinterpret_u8_s64_x3 (z0), -+ z0_res = svreinterpret_u8 (z0)) -+ -+/* -+** reinterpret_u8_u8_x3_untied: -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** mov (z18|z19|z20)\.d, (z23|z24|z25)\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_u8_u8_x3_untied, svuint8x3_t, svuint8x3_t, z18, -+ svreinterpret_u8_u8_x3 (z23), -+ svreinterpret_u8 (z23)) -+ -+/* -+** reinterpret_u8_u32_x4_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u8_u32_x4_tied1, svuint8x4_t, svuint32x4_t, -+ z0_res = svreinterpret_u8_u32_x4 (z0), -+ z0_res = svreinterpret_u8 (z0)) -+ -+/* -+** reinterpret_u8_f64_x4_untied: -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** mov (z28|z29|z30|z31)\.d, z[4-7]\.d -+** ret -+*/ -+TEST_DUAL_XN (reinterpret_u8_f64_x4_untied, svuint8x4_t, svfloat64x4_t, z28, -+ svreinterpret_u8_f64_x4 (z4), -+ svreinterpret_u8 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h -index fbf392b3e..2da61ff5c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h -@@ -421,4 +421,18 @@ - return z0_res; \ - } - -+#define TEST_DUAL_XN(NAME, TTYPE1, TTYPE2, RES, CODE1, CODE2) \ -+ PROTO (NAME, void, ()) \ -+ { \ -+ register TTYPE1 z0 __asm ("z0"); \ -+ register TTYPE2 z4 __asm ("z4"); \ -+ register TTYPE1 z18 __asm ("z18"); \ -+ register TTYPE2 z23 __asm ("z23"); \ -+ register TTYPE1 z28 __asm ("z28"); \ -+ __asm volatile ("" : "=w" (z0), "=w" (z4), "=w" (z18), \ -+ "=w" (z23), "=w" (z28)); \ -+ INVOKE (RES = CODE1, RES = CODE2); \ -+ __asm volatile ("" :: "w" (RES)); \ -+ } -+ - #endif --- -2.33.0 - diff --git a/0171-Backport-SME-attribs-Use-existing-traits-for-excl_ha.patch b/0171-Backport-SME-attribs-Use-existing-traits-for-excl_ha.patch deleted file mode 100644 index ecd15bd..0000000 --- a/0171-Backport-SME-attribs-Use-existing-traits-for-excl_ha.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 11f813112629dbad432134f7b4c7c9a93551eb3c Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Mon, 27 Nov 2023 13:38:16 +0000 -Subject: [PATCH 072/157] [Backport][SME] attribs: Use existing traits for - excl_hash_traits - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5b33cf3a3a2025a4856f90fea8bd04884c2f6b31 - -excl_hash_traits can be defined more simply by reusing existing traits. - -gcc/ - * attribs.cc (excl_hash_traits): Delete. - (test_attribute_exclusions): Use pair_hash and nofree_string_hash - instead. ---- - gcc/attribs.cc | 45 +++------------------------------------------ - 1 file changed, 3 insertions(+), 42 deletions(-) - -diff --git a/gcc/attribs.cc b/gcc/attribs.cc -index b219f8780..16d05b1da 100644 ---- a/gcc/attribs.cc -+++ b/gcc/attribs.cc -@@ -2555,47 +2555,6 @@ namespace selftest - - typedef std::pair excl_pair; - --struct excl_hash_traits: typed_noop_remove --{ -- typedef excl_pair value_type; -- typedef value_type compare_type; -- -- static hashval_t hash (const value_type &x) -- { -- hashval_t h1 = htab_hash_string (x.first); -- hashval_t h2 = htab_hash_string (x.second); -- return h1 ^ h2; -- } -- -- static bool equal (const value_type &x, const value_type &y) -- { -- return !strcmp (x.first, y.first) && !strcmp (x.second, y.second); -- } -- -- static void mark_deleted (value_type &x) -- { -- x = value_type (NULL, NULL); -- } -- -- static const bool empty_zero_p = false; -- -- static void mark_empty (value_type &x) -- { -- x = value_type ("", ""); -- } -- -- static bool is_deleted (const value_type &x) -- { -- return !x.first && !x.second; -- } -- -- static bool is_empty (const value_type &x) -- { -- return !*x.first && !*x.second; -- } --}; -- -- - /* Self-test to verify that each attribute exclusion is symmetric, - meaning that if attribute A is encoded as incompatible with - attribute B then the opposite relationship is also encoded. -@@ -2605,13 +2564,15 @@ struct excl_hash_traits: typed_noop_remove - static void - test_attribute_exclusions () - { -+ using excl_hash_traits = pair_hash; -+ - /* Iterate over the array of attribute tables first (with TI0 as - the index) and over the array of attribute_spec in each table - (with SI0 as the index). */ - const size_t ntables = ARRAY_SIZE (attribute_tables); - - /* Set of pairs of mutually exclusive attributes. */ -- typedef hash_set exclusion_set; -+ typedef hash_set exclusion_set; - exclusion_set excl_set; - - for (size_t ti0 = 0; ti0 != ntables; ++ti0) --- -2.33.0 - diff --git a/0172-Backport-SME-Allow-target-attributes-in-non-gnu-name.patch b/0172-Backport-SME-Allow-target-attributes-in-non-gnu-name.patch deleted file mode 100644 index 5f5b8f3..0000000 --- a/0172-Backport-SME-Allow-target-attributes-in-non-gnu-name.patch +++ /dev/null @@ -1,2369 +0,0 @@ -From 82d654912e3671055034e789a8f7110f6d87d447 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Sat, 2 Dec 2023 13:49:52 +0000 -Subject: [PATCH 073/157] [Backport][SME] Allow target attributes in non-gnu - namespaces - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7fa24687aa3a683fd105ce5ff6b176f48dca3b6c - -Currently there are four static sources of attributes: - -- LANG_HOOKS_ATTRIBUTE_TABLE -- LANG_HOOKS_COMMON_ATTRIBUTE_TABLE -- LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE -- TARGET_ATTRIBUTE_TABLE - -All of the attributes in these tables go in the "gnu" namespace. -This means that they can use the traditional GNU __attribute__((...)) -syntax and the standard [[gnu::...]] syntax. - -Standard attributes are registered dynamically with a null namespace. -There are no supported attributes in other namespaces (clang, vendor -namespaces, etc.). - -This patch tries to generalise things by making the namespace -part of the attribute specification. - -It's usual for multiple attributes to be defined in the same namespace, -so rather than adding the namespace to each individual definition, -it seemed better to group attributes in the same namespace together. -This would also allow us to reuse the same table for clang attributes -that are written with the GNU syntax, or other similar situations -where the attribute can be accessed via multiple "spellings". - -The patch therefore adds a scoped_attribute_specs that contains -a namespace and a list of attributes in that namespace. - -It's still possible to have multiple scoped_attribute_specs -for the same namespace. E.g. it makes sense to keep the -C++-specific, C/C++-common, and format-related attributes in -separate tables, even though they're all GNU attributes. - -Current lists of attributes are terminated by a null name. -Rather than keep that for the new structure, it seemed neater -to use an array_slice. This also makes the tables slighly more -compact. - -In general, a target might want to support attributes in multiple -namespaces. Rather than have a separate hook for each possibility -(like the three langhooks above), it seemed better to make -TARGET_ATTRIBUTE_TABLE a table of tables. Specifically, it's -an array_slice of scoped_attribute_specs. - -We can do the same thing for langhooks, which allows the three hooks -above to be merged into a single LANG_HOOKS_ATTRIBUTE_TABLE. -It also allows the standard attributes to be registered statically -and checked by the usual attribs.cc checks. - -The patch adds a TARGET_GNU_ATTRIBUTES helper for the common case -in which a target wants a single table of gnu attributes. It can -only be used if the table is free of preprocessor directives. - -There are probably other things we need to do to make vendor namespaces -work smoothly. E.g. in principle it would be good to make exclusion -sets namespace-aware. But to some extent we have that with standard -vs. gnu attributes too. This patch is just supposed to be a first step. - -gcc/ - * attribs.h (scoped_attribute_specs): New structure. - (register_scoped_attributes): Take a reference to a - scoped_attribute_specs instead of separate namespace and array - parameters. - * plugin.h (register_scoped_attributes): Likewise. - * attribs.cc (register_scoped_attributes): Likewise. - (attribute_tables): Change into an array of scoped_attribute_specs - pointers. Reduce to 1 element for frontends and 1 element for targets. - (empty_attribute_table): Delete. - (check_attribute_tables): Update for changes to attribute_tables. - Use a hash_set to identify duplicates. - (handle_ignored_attributes_option): Update for above changes. - (init_attributes): Likewise. - (excl_pair): Delete. - (test_attribute_exclusions): Update for above changes. Don't - enforce symmetry for standard attributes in the top-level namespace. - * langhooks-def.h (LANG_HOOKS_COMMON_ATTRIBUTE_TABLE): Delete. - (LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE): Likewise. - (LANG_HOOKS_INITIALIZER): Update accordingly. - (LANG_HOOKS_ATTRIBUTE_TABLE): Define to an empty constructor. - * langhooks.h (lang_hooks::common_attribute_table): Delete. - (lang_hooks::format_attribute_table): Likewise. - (lang_hooks::attribute_table): Redefine to an array of - scoped_attribute_specs pointers. - * target-def.h (TARGET_GNU_ATTRIBUTES): New macro. - * target.def (attribute_spec): Redefine to return an array of - scoped_attribute_specs pointers. - * tree-inline.cc (function_attribute_inlinable_p): Update accordingly. - * doc/tm.texi: Regenerate. - * config/aarch64/aarch64.cc (aarch64_attribute_table): Define using - TARGET_GNU_ATTRIBUTES. - * config/alpha/alpha.cc (vms_attribute_table): Likewise. - * config/avr/avr.cc (avr_attribute_table): Likewise. - * config/bfin/bfin.cc (bfin_attribute_table): Likewise. - * config/bpf/bpf.cc (bpf_attribute_table): Likewise. - * config/csky/csky.cc (csky_attribute_table): Likewise. - * config/epiphany/epiphany.cc (epiphany_attribute_table): Likewise. - * config/gcn/gcn.cc (gcn_attribute_table): Likewise. - * config/h8300/h8300.cc (h8300_attribute_table): Likewise. - * config/loongarch/loongarch.cc (loongarch_attribute_table): Likewise. - * config/m32c/m32c.cc (m32c_attribute_table): Likewise. - * config/m32r/m32r.cc (m32r_attribute_table): Likewise. - * config/m68k/m68k.cc (m68k_attribute_table): Likewise. - * config/mcore/mcore.cc (mcore_attribute_table): Likewise. - * config/microblaze/microblaze.cc (microblaze_attribute_table): - Likewise. - * config/mips/mips.cc (mips_attribute_table): Likewise. - * config/msp430/msp430.cc (msp430_attribute_table): Likewise. - * config/nds32/nds32.cc (nds32_attribute_table): Likewise. - * config/nvptx/nvptx.cc (nvptx_attribute_table): Likewise. - * config/riscv/riscv.cc (riscv_attribute_table): Likewise. - * config/rl78/rl78.cc (rl78_attribute_table): Likewise. - * config/rx/rx.cc (rx_attribute_table): Likewise. - * config/s390/s390.cc (s390_attribute_table): Likewise. - * config/sh/sh.cc (sh_attribute_table): Likewise. - * config/sparc/sparc.cc (sparc_attribute_table): Likewise. - * config/stormy16/stormy16.cc (xstormy16_attribute_table): Likewise. - * config/v850/v850.cc (v850_attribute_table): Likewise. - * config/visium/visium.cc (visium_attribute_table): Likewise. - * config/arc/arc.cc (arc_attribute_table): Likewise. Move further - down file. - * config/arm/arm.cc (arm_attribute_table): Update for above changes, - using... - (arm_gnu_attributes, arm_gnu_attribute_table): ...these new globals. - * config/i386/i386-options.h (ix86_attribute_table): Delete. - (ix86_gnu_attribute_table): Declare. - * config/i386/i386-options.cc (ix86_attribute_table): Replace with... - (ix86_gnu_attributes, ix86_gnu_attribute_table): ...these two globals. - * config/i386/i386.cc (ix86_attribute_table): Define as an array of - scoped_attribute_specs pointers. - * config/ia64/ia64.cc (ia64_attribute_table): Update for above changes, - using... - (ia64_gnu_attributes, ia64_gnu_attribute_table): ...these new globals. - * config/rs6000/rs6000.cc (rs6000_attribute_table): Update for above - changes, using... - (rs6000_gnu_attributes, rs6000_gnu_attribute_table): ...these new - globals. - -gcc/ada/ - * gcc-interface/gigi.h (gnat_internal_attribute_table): Change - type to scoped_attribute_specs. - * gcc-interface/utils.cc (gnat_internal_attribute_table): Likewise, - using... - (gnat_internal_attributes): ...this as the underlying array. - * gcc-interface/misc.cc (gnat_attribute_table): New global. - (LANG_HOOKS_ATTRIBUTE_TABLE): Use it. - -gcc/c-family/ - * c-common.h (c_common_attribute_table): Replace with... - (c_common_gnu_attribute_table): ...this. - (c_common_format_attribute_table): Change type to - scoped_attribute_specs. - * c-attribs.cc (c_common_attribute_table): Replace with... - (c_common_gnu_attributes, c_common_gnu_attribute_table): ...these - new globals. - (c_common_format_attribute_table): Change type to - scoped_attribute_specs, using... - (c_common_format_attributes): ...this as the underlying array. - -gcc/c/ - * c-tree.h (std_attribute_table): Declare. - * c-decl.cc (std_attribute_table): Change type to - scoped_attribute_specs, using... - (std_attributes): ...this as the underlying array. - (c_init_decl_processing): Remove call to register_scoped_attributes. - * c-objc-common.h (c_objc_attribute_table): New global. - (LANG_HOOKS_ATTRIBUTE_TABLE): Use it. - (LANG_HOOKS_COMMON_ATTRIBUTE_TABLE): Delete. - (LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE): Delete. - -gcc/cp/ - * cp-tree.h (cxx_attribute_table): Delete. - (cxx_gnu_attribute_table, std_attribute_table): Declare. - * cp-objcp-common.h (LANG_HOOKS_COMMON_ATTRIBUTE_TABLE): Delete. - (LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE): Delete. - (cp_objcp_attribute_table): New table. - (LANG_HOOKS_ATTRIBUTE_TABLE): Redefine. - * tree.cc (cxx_attribute_table): Replace with... - (cxx_gnu_attributes, cxx_gnu_attribute_table): ...these globals. - (std_attribute_table): Change type to scoped_attribute_specs, using... - (std_attributes): ...this as the underlying array. - (init_tree): Remove call to register_scoped_attributes. - -gcc/d/ - * d-tree.h (d_langhook_attribute_table): Replace with... - (d_langhook_gnu_attribute_table): ...this. - (d_langhook_common_attribute_table): Change type to - scoped_attribute_specs. - * d-attribs.cc (d_langhook_common_attribute_table): Change type to - scoped_attribute_specs, using... - (d_langhook_common_attributes): ...this as the underlying array. - (d_langhook_attribute_table): Replace with... - (d_langhook_gnu_attributes, d_langhook_gnu_attribute_table): ...these - new globals. - (uda_attribute_p): Update accordingly, and update for new - targetm.attribute_table type. - * d-lang.cc (d_langhook_attribute_table): New global. - (LANG_HOOKS_COMMON_ATTRIBUTE_TABLE): Delete. - -gcc/fortran/ - * f95-lang.cc: Include attribs.h. - (gfc_attribute_table): Change to an array of scoped_attribute_specs - pointers, using... - (gfc_gnu_attributes, gfc_gnu_attribute_table): ...these new globals. - -gcc/jit/ - * dummy-frontend.cc (jit_format_attribute_table): Change type to - scoped_attribute_specs, using... - (jit_format_attributes): ...this as the underlying array. - (jit_attribute_table): Change to an array of scoped_attribute_specs - pointers, using... - (jit_gnu_attributes, jit_gnu_attribute_table): ...these new globals - for the original array. Include the format attributes. - (LANG_HOOKS_COMMON_ATTRIBUTE_TABLE): Delete. - (LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE): Delete. - (LANG_HOOKS_ATTRIBUTE_TABLE): Define. - -gcc/lto/ - * lto-lang.cc (lto_format_attribute_table): Change type to - scoped_attribute_specs, using... - (lto_format_attributes): ...this as the underlying array. - (lto_attribute_table): Change to an array of scoped_attribute_specs - pointers, using... - (lto_gnu_attributes, lto_gnu_attribute_table): ...these new globals - for the original array. Include the format attributes. - (LANG_HOOKS_COMMON_ATTRIBUTE_TABLE): Delete. - (LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE): Delete. - (LANG_HOOKS_ATTRIBUTE_TABLE): Define. ---- - gcc/ada/gcc-interface/gigi.h | 2 +- - gcc/ada/gcc-interface/misc.cc | 7 +- - gcc/ada/gcc-interface/utils.cc | 8 +- - gcc/attribs.cc | 221 ++++++++++++---------------- - gcc/attribs.h | 12 +- - gcc/c-family/c-attribs.cc | 20 ++- - gcc/c-family/c-common.h | 4 +- - gcc/c/c-decl.cc | 12 +- - gcc/c/c-objc-common.h | 14 +- - gcc/c/c-tree.h | 2 + - gcc/config/aarch64/aarch64.cc | 7 +- - gcc/config/alpha/alpha.cc | 7 +- - gcc/config/arc/arc.cc | 74 +++++----- - gcc/config/arm/arm.cc | 15 +- - gcc/config/avr/avr.cc | 7 +- - gcc/config/bfin/bfin.cc | 7 +- - gcc/config/bpf/bpf.cc | 9 +- - gcc/config/csky/csky.cc | 7 +- - gcc/config/epiphany/epiphany.cc | 7 +- - gcc/config/gcn/gcn.cc | 8 +- - gcc/config/h8300/h8300.cc | 7 +- - gcc/config/i386/i386-options.cc | 10 +- - gcc/config/i386/i386-options.h | 2 +- - gcc/config/i386/i386.cc | 5 + - gcc/config/ia64/ia64.cc | 15 +- - gcc/config/m32c/m32c.cc | 7 +- - gcc/config/m32r/m32r.cc | 7 +- - gcc/config/m68k/m68k.cc | 7 +- - gcc/config/mcore/mcore.cc | 7 +- - gcc/config/microblaze/microblaze.cc | 7 +- - gcc/config/mips/mips.cc | 7 +- - gcc/config/msp430/msp430.cc | 8 +- - gcc/config/nds32/nds32.cc | 9 +- - gcc/config/nvptx/nvptx.cc | 7 +- - gcc/config/riscv/riscv.cc | 9 +- - gcc/config/rl78/rl78.cc | 7 +- - gcc/config/rs6000/rs6000.cc | 13 +- - gcc/config/rx/rx.cc | 7 +- - gcc/config/s390/s390.cc | 9 +- - gcc/config/sh/sh.cc | 7 +- - gcc/config/sparc/sparc.cc | 7 +- - gcc/config/stormy16/stormy16.cc | 7 +- - gcc/config/v850/v850.cc | 7 +- - gcc/config/visium/visium.cc | 7 +- - gcc/cp/cp-objcp-common.h | 15 +- - gcc/cp/cp-tree.h | 3 +- - gcc/cp/tree.cc | 16 +- - gcc/d/d-attribs.cc | 35 ++--- - gcc/d/d-lang.cc | 8 +- - gcc/d/d-tree.h | 4 +- - gcc/doc/tm.texi | 33 ++++- - gcc/fortran/f95-lang.cc | 14 +- - gcc/jit/dummy-frontend.cc | 32 ++-- - gcc/langhooks-def.h | 6 +- - gcc/langhooks.h | 4 +- - gcc/lto/lto-lang.cc | 30 ++-- - gcc/plugin.h | 3 +- - gcc/target-def.h | 14 ++ - gcc/target.def | 35 ++++- - gcc/tree-inline.cc | 7 +- - 60 files changed, 491 insertions(+), 403 deletions(-) - -diff --git a/gcc/ada/gcc-interface/gigi.h b/gcc/ada/gcc-interface/gigi.h -index bd559d176..6ababfcbb 100644 ---- a/gcc/ada/gcc-interface/gigi.h -+++ b/gcc/ada/gcc-interface/gigi.h -@@ -349,7 +349,7 @@ struct attrib - }; - - /* Table of machine-independent internal attributes. */ --extern const struct attribute_spec gnat_internal_attribute_table[]; -+extern const struct scoped_attribute_specs gnat_internal_attribute_table; - - /* Define the entries in the standard data array. */ - enum standard_datatypes -diff --git a/gcc/ada/gcc-interface/misc.cc b/gcc/ada/gcc-interface/misc.cc -index 2caa83ff8..8dd055772 100644 ---- a/gcc/ada/gcc-interface/misc.cc -+++ b/gcc/ada/gcc-interface/misc.cc -@@ -1339,6 +1339,11 @@ get_lang_specific (tree node) - return TYPE_LANG_SPECIFIC (node); - } - -+const struct scoped_attribute_specs *const gnat_attribute_table[] = -+{ -+ &gnat_internal_attribute_table -+}; -+ - /* Definitions for our language-specific hooks. */ - - #undef LANG_HOOKS_NAME -@@ -1404,7 +1409,7 @@ get_lang_specific (tree node) - #undef LANG_HOOKS_GET_FIXED_POINT_TYPE_INFO - #define LANG_HOOKS_GET_FIXED_POINT_TYPE_INFO gnat_get_fixed_point_type_info - #undef LANG_HOOKS_ATTRIBUTE_TABLE --#define LANG_HOOKS_ATTRIBUTE_TABLE gnat_internal_attribute_table -+#define LANG_HOOKS_ATTRIBUTE_TABLE gnat_attribute_table - #undef LANG_HOOKS_BUILTIN_FUNCTION - #define LANG_HOOKS_BUILTIN_FUNCTION gnat_builtin_function - #undef LANG_HOOKS_INIT_TS -diff --git a/gcc/ada/gcc-interface/utils.cc b/gcc/ada/gcc-interface/utils.cc -index 049cf74eb..ef8524fa9 100644 ---- a/gcc/ada/gcc-interface/utils.cc -+++ b/gcc/ada/gcc-interface/utils.cc -@@ -134,7 +134,7 @@ static tree fake_attribute_handler (tree *, tree, tree, int, bool *); - - /* Table of machine-independent internal attributes for Ada. We support - this minimal set of attributes to accommodate the needs of builtins. */ --const struct attribute_spec gnat_internal_attribute_table[] = -+static const attribute_spec gnat_internal_attributes[] = - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -207,9 +207,11 @@ const struct attribute_spec gnat_internal_attribute_table[] = - fake_attribute_handler, NULL }, - { "format_arg", 1, 1, false, true, true, false, - fake_attribute_handler, NULL }, -+}; - -- { NULL, 0, 0, false, false, false, false, -- NULL, NULL } -+const scoped_attribute_specs gnat_internal_attribute_table = -+{ -+ "gnu", gnat_internal_attributes - }; - - /* Associates a GNAT tree node to a GCC tree node. It is used in -diff --git a/gcc/attribs.cc b/gcc/attribs.cc -index 16d05b1da..656ea739e 100644 ---- a/gcc/attribs.cc -+++ b/gcc/attribs.cc -@@ -39,7 +39,7 @@ along with GCC; see the file COPYING3. If not see - - /* Table of the tables of attributes (common, language, format, machine) - searched. */ --static const struct attribute_spec *attribute_tables[4]; -+static array_slice attribute_tables[2]; - - /* Substring representation. */ - -@@ -102,13 +102,6 @@ static const struct attribute_spec *lookup_scoped_attribute_spec (const_tree, - - static bool attributes_initialized = false; - --/* Default empty table of attributes. */ -- --static const struct attribute_spec empty_attribute_table[] = --{ -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -- - /* Return base name of the attribute. Ie '__attr__' is turned into 'attr'. - To avoid need for copying, we simply return length of the string. */ - -@@ -118,21 +111,19 @@ extract_attribute_substring (struct substring *str) - canonicalize_attr_name (str->str, str->length); - } - --/* Insert an array of attributes ATTRIBUTES into a namespace. This -- array must be NULL terminated. NS is the name of attribute -- namespace. IGNORED_P is true iff all unknown attributes in this -- namespace should be ignored for the purposes of -Wattributes. The -- function returns the namespace into which the attributes have been -- registered. */ -+/* Insert SPECS into its namespace. IGNORED_P is true iff all unknown -+ attributes in this namespace should be ignored for the purposes of -+ -Wattributes. The function returns the namespace into which the -+ attributes have been registered. */ - - scoped_attributes * --register_scoped_attributes (const struct attribute_spec *attributes, -- const char *ns, bool ignored_p /*=false*/) -+register_scoped_attributes (const scoped_attribute_specs &specs, -+ bool ignored_p /*=false*/) - { - scoped_attributes *result = NULL; - - /* See if we already have attributes in the namespace NS. */ -- result = find_attribute_namespace (ns); -+ result = find_attribute_namespace (specs.ns); - - if (result == NULL) - { -@@ -143,7 +134,7 @@ register_scoped_attributes (const struct attribute_spec *attributes, - attributes_table.create (64); - - memset (&sa, 0, sizeof (sa)); -- sa.ns = ns; -+ sa.ns = specs.ns; - sa.attributes.create (64); - sa.ignored_p = ignored_p; - result = attributes_table.safe_push (sa); -@@ -153,10 +144,10 @@ register_scoped_attributes (const struct attribute_spec *attributes, - result->ignored_p |= ignored_p; - - /* Really add the attributes to their namespace now. */ -- for (unsigned i = 0; attributes[i].name != NULL; ++i) -+ for (const attribute_spec &attribute : specs.attributes) - { -- result->attributes.safe_push (attributes[i]); -- register_scoped_attribute (&attributes[i], result); -+ result->attributes.safe_push (attribute); -+ register_scoped_attribute (&attribute, result); - } - - gcc_assert (result != NULL); -@@ -183,49 +174,40 @@ find_attribute_namespace (const char* ns) - static void - check_attribute_tables (void) - { -- for (size_t i = 0; i < ARRAY_SIZE (attribute_tables); i++) -- for (size_t j = 0; attribute_tables[i][j].name != NULL; j++) -- { -- /* The name must not begin and end with __. */ -- const char *name = attribute_tables[i][j].name; -- int len = strlen (name); -+ hash_set> names; - -- gcc_assert (!(name[0] == '_' && name[1] == '_' -- && name[len - 1] == '_' && name[len - 2] == '_')); -+ for (auto scoped_array : attribute_tables) -+ for (auto scoped_attributes : scoped_array) -+ for (const attribute_spec &attribute : scoped_attributes->attributes) -+ { -+ /* The name must not begin and end with __. */ -+ const char *name = attribute.name; -+ int len = strlen (name); -+ -+ gcc_assert (!(name[0] == '_' && name[1] == '_' -+ && name[len - 1] == '_' && name[len - 2] == '_')); - -- /* The minimum and maximum lengths must be consistent. */ -- gcc_assert (attribute_tables[i][j].min_length >= 0); -+ /* The minimum and maximum lengths must be consistent. */ -+ gcc_assert (attribute.min_length >= 0); - -- gcc_assert (attribute_tables[i][j].max_length == -1 -- || (attribute_tables[i][j].max_length -- >= attribute_tables[i][j].min_length)); -+ gcc_assert (attribute.max_length == -1 -+ || attribute.max_length >= attribute.min_length); - -- /* An attribute cannot require both a DECL and a TYPE. */ -- gcc_assert (!attribute_tables[i][j].decl_required -- || !attribute_tables[i][j].type_required); -+ /* An attribute cannot require both a DECL and a TYPE. */ -+ gcc_assert (!attribute.decl_required -+ || !attribute.type_required); - - /* If an attribute requires a function type, in particular - it requires a type. */ -- gcc_assert (!attribute_tables[i][j].function_type_required -- || attribute_tables[i][j].type_required); -- } -- -- /* Check that each name occurs just once in each table. */ -- for (size_t i = 0; i < ARRAY_SIZE (attribute_tables); i++) -- for (size_t j = 0; attribute_tables[i][j].name != NULL; j++) -- for (size_t k = j + 1; attribute_tables[i][k].name != NULL; k++) -- gcc_assert (strcmp (attribute_tables[i][j].name, -- attribute_tables[i][k].name)); -- -- /* Check that no name occurs in more than one table. Names that -- begin with '*' are exempt, and may be overridden. */ -- for (size_t i = 0; i < ARRAY_SIZE (attribute_tables); i++) -- for (size_t j = i + 1; j < ARRAY_SIZE (attribute_tables); j++) -- for (size_t k = 0; attribute_tables[i][k].name != NULL; k++) -- for (size_t l = 0; attribute_tables[j][l].name != NULL; l++) -- gcc_assert (attribute_tables[i][k].name[0] == '*' -- || strcmp (attribute_tables[i][k].name, -- attribute_tables[j][l].name)); -+ gcc_assert (!attribute.function_type_required -+ || attribute.type_required); -+ -+ /* Check that no name occurs more than once. Names that -+ begin with '*' are exempt, and may be overridden. */ -+ const char *ns = scoped_attributes->ns; -+ if (name[0] != '*' && names.add ({ ns ? ns : "", name })) -+ gcc_unreachable (); -+ } - } - - /* Used to stash pointers to allocated memory so that we can free them at -@@ -280,7 +262,7 @@ handle_ignored_attributes_option (vec *v) - canonicalize_attr_name (vendor_start, vendor_len); - /* We perform all this hijinks so that we don't have to copy OPT. */ - tree vendor_id = get_identifier_with_length (vendor_start, vendor_len); -- const char *attr; -+ array_slice attrs; - /* In the "vendor::" case, we should ignore *any* attribute coming - from this attribute namespace. */ - if (attr_len > 0) -@@ -292,22 +274,23 @@ handle_ignored_attributes_option (vec *v) - } - canonicalize_attr_name (attr_start, attr_len); - tree attr_id = get_identifier_with_length (attr_start, attr_len); -- attr = IDENTIFIER_POINTER (attr_id); -+ const char *attr = IDENTIFIER_POINTER (attr_id); - /* If we've already seen this vendor::attr, ignore it. Attempting to - register it twice would lead to a crash. */ - if (lookup_scoped_attribute_spec (vendor_id, attr_id)) - continue; -+ /* Create a table with extra attributes which we will register. -+ We can't free it here, so squirrel away the pointers. */ -+ attribute_spec *table = new attribute_spec { -+ attr, 0, -2, false, false, false, false, nullptr, nullptr -+ }; -+ ignored_attributes_table.safe_push (table); -+ attrs = { table, 1 }; - } -- else -- attr = nullptr; -- /* Create a table with extra attributes which we will register. -- We can't free it here, so squirrel away the pointers. */ -- attribute_spec *table = new attribute_spec[2]; -- ignored_attributes_table.safe_push (table); -- table[0] = { attr, 0, -2, false, false, false, false, nullptr, nullptr }; -- table[1] = { nullptr, 0, 0, false, false, false, false, nullptr, -- nullptr }; -- register_scoped_attributes (table, IDENTIFIER_POINTER (vendor_id), !attr); -+ const scoped_attribute_specs scoped_specs = { -+ IDENTIFIER_POINTER (vendor_id), attrs -+ }; -+ register_scoped_attributes (scoped_specs, attrs.empty ()); - } - } - -@@ -327,27 +310,18 @@ free_attr_data () - void - init_attributes (void) - { -- size_t i; -- - if (attributes_initialized) - return; - -- attribute_tables[0] = lang_hooks.common_attribute_table; -- attribute_tables[1] = lang_hooks.attribute_table; -- attribute_tables[2] = lang_hooks.format_attribute_table; -- attribute_tables[3] = targetm.attribute_table; -- -- /* Translate NULL pointers to pointers to the empty table. */ -- for (i = 0; i < ARRAY_SIZE (attribute_tables); i++) -- if (attribute_tables[i] == NULL) -- attribute_tables[i] = empty_attribute_table; -+ attribute_tables[0] = lang_hooks.attribute_table; -+ attribute_tables[1] = targetm.attribute_table; - - if (flag_checking) - check_attribute_tables (); - -- for (i = 0; i < ARRAY_SIZE (attribute_tables); ++i) -- /* Put all the GNU attributes into the "gnu" namespace. */ -- register_scoped_attributes (attribute_tables[i], "gnu"); -+ for (auto scoped_array : attribute_tables) -+ for (auto scoped_attributes : scoped_array) -+ register_scoped_attributes (*scoped_attributes); - - vec *ignored = (vec *) flag_ignored_attributes; - handle_ignored_attributes_option (ignored); -@@ -2551,10 +2525,6 @@ attr_access::array_as_string (tree type) const - namespace selftest - { - --/* Helper types to verify the consistency attribute exclusions. */ -- --typedef std::pair excl_pair; -- - /* Self-test to verify that each attribute exclusion is symmetric, - meaning that if attribute A is encoded as incompatible with - attribute B then the opposite relationship is also encoded. -@@ -2569,55 +2539,54 @@ test_attribute_exclusions () - /* Iterate over the array of attribute tables first (with TI0 as - the index) and over the array of attribute_spec in each table - (with SI0 as the index). */ -- const size_t ntables = ARRAY_SIZE (attribute_tables); -+ hash_set excl_set; - -- /* Set of pairs of mutually exclusive attributes. */ -- typedef hash_set exclusion_set; -- exclusion_set excl_set; -+ for (auto scoped_array : attribute_tables) -+ for (auto scoped_attributes : scoped_array) -+ for (const attribute_spec &attribute : scoped_attributes->attributes) -+ { -+ const attribute_spec::exclusions *excl = attribute.exclude; - -- for (size_t ti0 = 0; ti0 != ntables; ++ti0) -- for (size_t s0 = 0; attribute_tables[ti0][s0].name; ++s0) -- { -- const attribute_spec::exclusions *excl -- = attribute_tables[ti0][s0].exclude; -+ /* Skip each attribute that doesn't define exclusions. */ -+ if (!excl) -+ continue; - -- /* Skip each attribute that doesn't define exclusions. */ -- if (!excl) -- continue; -+ /* Skip standard (non-GNU) attributes, since currently the -+ exclusions are implicitly for GNU attributes only. -+ Also, C++ likely and unlikely get rewritten to gnu::hot -+ and gnu::cold, so symmetry isn't necessary there. */ -+ if (!scoped_attributes->ns) -+ continue; - -- const char *attr_name = attribute_tables[ti0][s0].name; -+ const char *attr_name = attribute.name; - -- /* Iterate over the set of exclusions for every attribute -- (with EI0 as the index) adding the exclusions defined -- for each to the set. */ -- for (size_t ei0 = 0; excl[ei0].name; ++ei0) -- { -- const char *excl_name = excl[ei0].name; -+ /* Iterate over the set of exclusions for every attribute -+ (with EI0 as the index) adding the exclusions defined -+ for each to the set. */ -+ for (size_t ei0 = 0; excl[ei0].name; ++ei0) -+ { -+ const char *excl_name = excl[ei0].name; - -- if (!strcmp (attr_name, excl_name)) -- continue; -+ if (!strcmp (attr_name, excl_name)) -+ continue; - -- excl_set.add (excl_pair (attr_name, excl_name)); -- } -- } -+ excl_set.add ({ attr_name, excl_name }); -+ } -+ } - - /* Traverse the set of mutually exclusive pairs of attributes - and verify that they are symmetric. */ -- for (exclusion_set::iterator it = excl_set.begin (); -- it != excl_set.end (); -- ++it) -- { -- if (!excl_set.contains (excl_pair ((*it).second, (*it).first))) -- { -- /* An exclusion for an attribute has been found that -- doesn't have a corresponding exclusion in the opposite -- direction. */ -- char desc[120]; -- sprintf (desc, "'%s' attribute exclusion '%s' must be symmetric", -- (*it).first, (*it).second); -- fail (SELFTEST_LOCATION, desc); -- } -- } -+ for (auto excl_pair : excl_set) -+ if (!excl_set.contains ({ excl_pair.second, excl_pair.first })) -+ { -+ /* An exclusion for an attribute has been found that -+ doesn't have a corresponding exclusion in the opposite -+ direction. */ -+ char desc[120]; -+ sprintf (desc, "'%s' attribute exclusion '%s' must be symmetric", -+ excl_pair.first, excl_pair.second); -+ fail (SELFTEST_LOCATION, desc); -+ } - } - - void -diff --git a/gcc/attribs.h b/gcc/attribs.h -index 5b6f63ede..0856f98fb 100644 ---- a/gcc/attribs.h -+++ b/gcc/attribs.h -@@ -20,6 +20,13 @@ along with GCC; see the file COPYING3. If not see - #ifndef GCC_ATTRIBS_H - #define GCC_ATTRIBS_H - -+/* A set of attributes that belong to the same namespace, given by NS. */ -+struct scoped_attribute_specs -+{ -+ const char *ns; -+ array_slice attributes; -+}; -+ - extern const struct attribute_spec *lookup_attribute_spec (const_tree); - extern void free_attr_data (); - extern void init_attributes (void); -@@ -42,9 +49,8 @@ extern tree make_attribute (const char *, const char *, tree); - extern bool attribute_ignored_p (tree); - extern bool attribute_ignored_p (const attribute_spec *const); - --extern struct scoped_attributes* register_scoped_attributes (const struct attribute_spec *, -- const char *, -- bool = false); -+extern struct scoped_attributes * -+ register_scoped_attributes (const scoped_attribute_specs &, bool = false); - - extern char *sorted_attr_string (tree); - extern bool common_function_versions (tree, tree); -diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc -index 111a33f40..d5c0392b7 100644 ---- a/gcc/c-family/c-attribs.cc -+++ b/gcc/c-family/c-attribs.cc -@@ -282,7 +282,7 @@ static const struct attribute_spec::exclusions attr_stack_protect_exclusions[] = - /* Table of machine-independent attributes common to all C-like languages. - - Current list of processed common attributes: nonnull. */ --const struct attribute_spec c_common_attribute_table[] = -+const struct attribute_spec c_common_gnu_attributes[] = - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -554,23 +554,31 @@ const struct attribute_spec c_common_attribute_table[] = - { "*dealloc", 1, 2, true, false, false, false, - handle_dealloc_attribute, NULL }, - { "tainted_args", 0, 0, true, false, false, false, -- handle_tainted_args_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } -+ handle_tainted_args_attribute, NULL } -+}; -+ -+const struct scoped_attribute_specs c_common_gnu_attribute_table = -+{ -+ "gnu", c_common_gnu_attributes - }; - - /* Give the specifications for the format attributes, used by C and all - descendants. - - Current list of processed format attributes: format, format_arg. */ --const struct attribute_spec c_common_format_attribute_table[] = -+const struct attribute_spec c_common_format_attributes[] = - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ - { "format", 3, 3, false, true, true, false, - handle_format_attribute, NULL }, - { "format_arg", 1, 1, false, true, true, false, -- handle_format_arg_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } -+ handle_format_arg_attribute, NULL } -+}; -+ -+const struct scoped_attribute_specs c_common_format_attribute_table = -+{ -+ "gnu", c_common_format_attributes - }; - - /* Returns TRUE iff the attribute indicated by ATTR_ID takes a plain -diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h -index 3d5b9c40e..d1503c5a7 100644 ---- a/gcc/c-family/c-common.h -+++ b/gcc/c-family/c-common.h -@@ -819,8 +819,8 @@ enum conversion_safety { - extern struct visibility_flags visibility_options; - - /* Attribute table common to the C front ends. */ --extern const struct attribute_spec c_common_attribute_table[]; --extern const struct attribute_spec c_common_format_attribute_table[]; -+extern const struct scoped_attribute_specs c_common_gnu_attribute_table; -+extern const struct scoped_attribute_specs c_common_format_attribute_table; - - /* Pointer to function to lazily generate the VAR_DECL for __FUNCTION__ etc. - ID is the identifier to use, NAME is the string. -diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc -index 619a20909..9d87a8cdb 100644 ---- a/gcc/c/c-decl.cc -+++ b/gcc/c/c-decl.cc -@@ -4460,7 +4460,7 @@ handle_nodiscard_attribute (tree *node, tree name, tree /*args*/, - return NULL_TREE; - } - /* Table of supported standard (C2x) attributes. */ --const struct attribute_spec std_attribute_table[] = -+static const attribute_spec std_attributes[] = - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -4471,8 +4471,12 @@ const struct attribute_spec std_attribute_table[] = - { "maybe_unused", 0, 0, false, false, false, false, - handle_unused_attribute, NULL }, - { "nodiscard", 0, 1, false, false, false, false, -- handle_nodiscard_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } -+ handle_nodiscard_attribute, NULL } -+}; -+ -+const scoped_attribute_specs std_attribute_table = -+{ -+ nullptr, std_attributes - }; - - /* Create the predefined scalar types of C, -@@ -4488,8 +4492,6 @@ c_init_decl_processing (void) - /* Initialize reserved words for parser. */ - c_parse_init (); - -- register_scoped_attributes (std_attribute_table, NULL); -- - current_function_decl = NULL_TREE; - - gcc_obstack_init (&parser_obstack); -diff --git a/gcc/c/c-objc-common.h b/gcc/c/c-objc-common.h -index 0b60df975..bc3dded23 100644 ---- a/gcc/c/c-objc-common.h -+++ b/gcc/c/c-objc-common.h -@@ -70,11 +70,15 @@ along with GCC; see the file COPYING3. If not see - #undef LANG_HOOKS_FINALIZE_EARLY_DEBUG - #define LANG_HOOKS_FINALIZE_EARLY_DEBUG c_common_finalize_early_debug - --/* Attribute hooks. */ --#undef LANG_HOOKS_COMMON_ATTRIBUTE_TABLE --#define LANG_HOOKS_COMMON_ATTRIBUTE_TABLE c_common_attribute_table --#undef LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE --#define LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE c_common_format_attribute_table -+static const scoped_attribute_specs *const c_objc_attribute_table[] = -+{ -+ &std_attribute_table, -+ &c_common_gnu_attribute_table, -+ &c_common_format_attribute_table -+}; -+ -+#undef LANG_HOOKS_ATTRIBUTE_TABLE -+#define LANG_HOOKS_ATTRIBUTE_TABLE c_objc_attribute_table - - #undef LANG_HOOKS_TREE_DUMP_DUMP_TREE_FN - #define LANG_HOOKS_TREE_DUMP_DUMP_TREE_FN c_dump_tree -diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h -index c70f0ba5a..654bd4094 100644 ---- a/gcc/c/c-tree.h -+++ b/gcc/c/c-tree.h -@@ -835,6 +835,8 @@ set_c_expr_source_range (c_expr *expr, - /* In c-fold.cc */ - extern vec incomplete_record_decls; - -+extern const struct scoped_attribute_specs std_attribute_table; -+ - #if CHECKING_P - namespace selftest { - extern void run_c_tests (void); -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 4194dfc70..114252a3c 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -2986,7 +2986,7 @@ handle_aarch64_vector_pcs_attribute (tree *node, tree name, tree, - } - - /* Table of machine attributes. */ --static const struct attribute_spec aarch64_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (aarch64_attribute_table, - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -2997,9 +2997,8 @@ static const struct attribute_spec aarch64_attribute_table[] = - NULL }, - { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL }, - { "SVE type", 3, 3, false, true, false, true, NULL, NULL }, -- { "SVE sizeless type", 0, 0, false, true, false, true, NULL, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ { "SVE sizeless type", 0, 0, false, true, false, true, NULL, NULL } -+}); - - /* An ISA extension in the co-processor and main instruction set space. */ - struct aarch64_option_extension -diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc -index 66c17149d..7fb491918 100644 ---- a/gcc/config/alpha/alpha.cc -+++ b/gcc/config/alpha/alpha.cc -@@ -7475,14 +7475,13 @@ common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED, - return NULL_TREE; - } - --static const struct attribute_spec vms_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (vms_attribute_table, - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ - { COMMON_OBJECT, 0, 1, true, false, false, false, common_object_handler, -- NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ NULL } -+}); - - void - vms_output_aligned_decl_common(FILE *file, tree decl, const char *name, -diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc -index fbc17e684..1c6adcab4 100644 ---- a/gcc/config/arc/arc.cc -+++ b/gcc/config/arc/arc.cc -@@ -230,44 +230,6 @@ static tree arc_handle_secure_attribute (tree *, tree, tree, int, bool *); - static tree arc_handle_uncached_attribute (tree *, tree, tree, int, bool *); - static tree arc_handle_aux_attribute (tree *, tree, tree, int, bool *); - --/* Initialized arc_attribute_table to NULL since arc doesnot have any -- machine specific supported attributes. */ --const struct attribute_spec arc_attribute_table[] = --{ -- /* { name, min_len, max_len, decl_req, type_req, fn_type_req, -- affects_type_identity, handler, exclude } */ -- { "interrupt", 1, 1, true, false, false, true, -- arc_handle_interrupt_attribute, NULL }, -- /* Function calls made to this symbol must be done indirectly, because -- it may lie outside of the 21/25 bit addressing range of a normal function -- call. */ -- { "long_call", 0, 0, false, true, true, false, NULL, NULL }, -- /* Whereas these functions are always known to reside within the 25 bit -- addressing range of unconditionalized bl. */ -- { "medium_call", 0, 0, false, true, true, false, NULL, NULL }, -- /* And these functions are always known to reside within the 21 bit -- addressing range of blcc. */ -- { "short_call", 0, 0, false, true, true, false, NULL, NULL }, -- /* Function which are not having the prologue and epilogue generated -- by the compiler. */ -- { "naked", 0, 0, true, false, false, false, arc_handle_fndecl_attribute, -- NULL }, -- /* Functions calls made using jli instruction. The pointer in JLI -- table is found latter. */ -- { "jli_always", 0, 0, false, true, true, false, NULL, NULL }, -- /* Functions calls made using jli instruction. The pointer in JLI -- table is given as input parameter. */ -- { "jli_fixed", 1, 1, false, true, true, false, arc_handle_jli_attribute, -- NULL }, -- /* Call a function using secure-mode. */ -- { "secure_call", 1, 1, false, true, true, false, arc_handle_secure_attribute, -- NULL }, -- /* Bypass caches using .di flag. */ -- { "uncached", 0, 0, false, true, false, false, arc_handle_uncached_attribute, -- NULL }, -- { "aux", 0, 1, true, false, false, false, arc_handle_aux_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; - static int arc_comp_type_attributes (const_tree, const_tree); - static void arc_file_start (void); - static void arc_internal_label (FILE *, const char *, unsigned long); -@@ -819,6 +781,42 @@ static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode); - - #include "target-def.h" - -+TARGET_GNU_ATTRIBUTES (arc_attribute_table, -+{ -+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, -+ affects_type_identity, handler, exclude } */ -+ { "interrupt", 1, 1, true, false, false, true, -+ arc_handle_interrupt_attribute, NULL }, -+ /* Function calls made to this symbol must be done indirectly, because -+ it may lie outside of the 21/25 bit addressing range of a normal function -+ call. */ -+ { "long_call", 0, 0, false, true, true, false, NULL, NULL }, -+ /* Whereas these functions are always known to reside within the 25 bit -+ addressing range of unconditionalized bl. */ -+ { "medium_call", 0, 0, false, true, true, false, NULL, NULL }, -+ /* And these functions are always known to reside within the 21 bit -+ addressing range of blcc. */ -+ { "short_call", 0, 0, false, true, true, false, NULL, NULL }, -+ /* Function which are not having the prologue and epilogue generated -+ by the compiler. */ -+ { "naked", 0, 0, true, false, false, false, arc_handle_fndecl_attribute, -+ NULL }, -+ /* Functions calls made using jli instruction. The pointer in JLI -+ table is found latter. */ -+ { "jli_always", 0, 0, false, true, true, false, NULL, NULL }, -+ /* Functions calls made using jli instruction. The pointer in JLI -+ table is given as input parameter. */ -+ { "jli_fixed", 1, 1, false, true, true, false, arc_handle_jli_attribute, -+ NULL }, -+ /* Call a function using secure-mode. */ -+ { "secure_call", 1, 1, false, true, true, false, arc_handle_secure_attribute, -+ NULL }, -+ /* Bypass caches using .di flag. */ -+ { "uncached", 0, 0, false, true, false, false, arc_handle_uncached_attribute, -+ NULL }, -+ { "aux", 0, 1, true, false, false, false, arc_handle_aux_attribute, NULL } -+}); -+ - #undef TARGET_ASM_ALIGNED_HI_OP - #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t" - #undef TARGET_ASM_ALIGNED_SI_OP -diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc -index c72e9c0b0..3bdc7e18e 100644 ---- a/gcc/config/arm/arm.cc -+++ b/gcc/config/arm/arm.cc -@@ -329,7 +329,7 @@ static rtx_insn *thumb1_md_asm_adjust (vec &, vec &, - static const char *arm_identify_fpu_from_isa (sbitmap); - - /* Table of machine attributes. */ --static const struct attribute_spec arm_attribute_table[] = -+static const attribute_spec arm_gnu_attributes[] = - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -377,8 +377,17 @@ static const struct attribute_spec arm_attribute_table[] = - arm_handle_cmse_nonsecure_entry, NULL }, - { "cmse_nonsecure_call", 0, 0, true, false, false, true, - arm_handle_cmse_nonsecure_call, NULL }, -- { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } -+ { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL } -+}; -+ -+static const scoped_attribute_specs arm_gnu_attribute_table = -+{ -+ "gnu", arm_gnu_attributes -+}; -+ -+static const scoped_attribute_specs *const arm_attribute_table[] = -+{ -+ &arm_gnu_attribute_table - }; - - /* Initialize the GCC target structure. */ -diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc -index 1b5a95410..7b37278ca 100644 ---- a/gcc/config/avr/avr.cc -+++ b/gcc/config/avr/avr.cc -@@ -9723,7 +9723,7 @@ avr_eval_addr_attrib (rtx x) - - - /* AVR attributes. */ --static const struct attribute_spec avr_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (avr_attribute_table, - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -9748,9 +9748,8 @@ static const struct attribute_spec avr_attribute_table[] = - { "address", 1, 1, true, false, false, false, - avr_handle_addr_attribute, NULL }, - { "absdata", 0, 0, true, false, false, false, -- avr_handle_absdata_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ avr_handle_absdata_attribute, NULL } -+}); - - - /* Return true if we support address space AS for the architecture in effect -diff --git a/gcc/config/bfin/bfin.cc b/gcc/config/bfin/bfin.cc -index b2a9142f5..fbc5c84d1 100644 ---- a/gcc/config/bfin/bfin.cc -+++ b/gcc/config/bfin/bfin.cc -@@ -4895,7 +4895,7 @@ bfin_handle_l2_attribute (tree *node, tree ARG_UNUSED (name), - } - - /* Table of valid machine attributes. */ --static const struct attribute_spec bfin_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (bfin_attribute_table, - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -4920,9 +4920,8 @@ static const struct attribute_spec bfin_attribute_table[] = - bfin_handle_l1_data_attribute, NULL }, - { "l1_data_B", 0, 0, true, false, false, false, - bfin_handle_l1_data_attribute, NULL }, -- { "l2", 0, 0, true, false, false, false, bfin_handle_l2_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ { "l2", 0, 0, true, false, false, false, bfin_handle_l2_attribute, NULL } -+}); - - /* Implementation of TARGET_ASM_INTEGER. When using FD-PIC, we need to - tell the assembler to generate pointers to function descriptors in -diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc -index 6a0e3bbca..0343af9c7 100644 ---- a/gcc/config/bpf/bpf.cc -+++ b/gcc/config/bpf/bpf.cc -@@ -146,7 +146,7 @@ bpf_handle_preserve_access_index_attribute (tree *node, tree name, - - /* Target-specific attributes. */ - --static const struct attribute_spec bpf_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (bpf_attribute_table, - { - /* Syntax: { name, min_len, max_len, decl_required, type_required, - function_type_required, affects_type_identity, handler, -@@ -159,11 +159,8 @@ static const struct attribute_spec bpf_attribute_table[] = - /* CO-RE support: attribute to mark that all accesses to the declared - struct/union/array should be recorded. */ - { "preserve_access_index", 0, -1, false, true, false, true, -- bpf_handle_preserve_access_index_attribute, NULL }, -- -- /* The last attribute spec is set to be NULL. */ -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ bpf_handle_preserve_access_index_attribute, NULL } -+}); - - #undef TARGET_ATTRIBUTE_TABLE - #define TARGET_ATTRIBUTE_TABLE bpf_attribute_table -diff --git a/gcc/config/csky/csky.cc b/gcc/config/csky/csky.cc -index e315e09a8..b511fafe5 100644 ---- a/gcc/config/csky/csky.cc -+++ b/gcc/config/csky/csky.cc -@@ -211,16 +211,15 @@ const int csky_dbx_regno[FIRST_PSEUDO_REGISTER] = - /* Table of machine attributes. */ - static tree csky_handle_fndecl_attribute (tree *, tree, tree, int, bool *); - static tree csky_handle_isr_attribute (tree *, tree, tree, int, bool *); --static const struct attribute_spec csky_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (csky_attribute_table, - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ - { "naked", 0, 0, true, false, false, false, csky_handle_fndecl_attribute, NULL }, - /* Interrupt Service Routines have special prologue and epilogue requirements. */ - { "interrupt", 0, 1, false, false, false, false, csky_handle_isr_attribute, NULL }, -- { "isr", 0, 1, false, false, false, false, csky_handle_isr_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ { "isr", 0, 1, false, false, false, false, csky_handle_isr_attribute, NULL } -+}); - - /* A C structure for machine-specific, per-function data. - This is added to the cfun structure. */ -diff --git a/gcc/config/epiphany/epiphany.cc b/gcc/config/epiphany/epiphany.cc -index 62636b1ec..8a7c0a988 100644 ---- a/gcc/config/epiphany/epiphany.cc -+++ b/gcc/config/epiphany/epiphany.cc -@@ -460,7 +460,7 @@ epiphany_init_reg_tables (void) - They unmask them while calling an interruptible - function, though. */ - --static const struct attribute_spec epiphany_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (epiphany_attribute_table, - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -470,9 +470,8 @@ static const struct attribute_spec epiphany_attribute_table[] = - epiphany_handle_forwarder_attribute, NULL }, - { "long_call", 0, 0, false, true, true, false, NULL, NULL }, - { "short_call", 0, 0, false, true, true, false, NULL, NULL }, -- { "disinterrupt", 0, 0, false, true, true, true, NULL, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ { "disinterrupt", 0, 0, false, true, true, true, NULL, NULL } -+}); - - /* Handle an "interrupt" attribute; arguments as in - struct attribute_spec.handler. */ -diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc -index e2cbdd1ac..0b049abcc 100644 ---- a/gcc/config/gcn/gcn.cc -+++ b/gcc/config/gcn/gcn.cc -@@ -363,14 +363,12 @@ gcn_handle_amdgpu_hsa_kernel_attribute (tree *node, tree name, - - Create target-specific __attribute__ types. */ - --static const struct attribute_spec gcn_attribute_table[] = { -+TARGET_GNU_ATTRIBUTES (gcn_attribute_table, { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, - affects_type_identity } */ - {"amdgpu_hsa_kernel", 0, GCN_KERNEL_ARG_TYPES, false, true, -- true, true, gcn_handle_amdgpu_hsa_kernel_attribute, NULL}, -- /* End element. */ -- {NULL, 0, 0, false, false, false, false, NULL, NULL} --}; -+ true, true, gcn_handle_amdgpu_hsa_kernel_attribute, NULL} -+}); - - /* }}} */ - /* {{{ Registers and modes. */ -diff --git a/gcc/config/h8300/h8300.cc b/gcc/config/h8300/h8300.cc -index 78cf15f15..a0fa689de 100644 ---- a/gcc/config/h8300/h8300.cc -+++ b/gcc/config/h8300/h8300.cc -@@ -4909,7 +4909,7 @@ h8300_insert_attributes (tree node, tree *attributes) - tiny_data: This variable lives in the tiny data area and can be - referenced with 16-bit absolute memory references. */ - --static const struct attribute_spec h8300_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (h8300_attribute_table, - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -4926,9 +4926,8 @@ static const struct attribute_spec h8300_attribute_table[] = - { "eightbit_data", 0, 0, true, false, false, false, - h8300_handle_eightbit_data_attribute, NULL }, - { "tiny_data", 0, 0, true, false, false, false, -- h8300_handle_tiny_data_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ h8300_handle_tiny_data_attribute, NULL } -+}); - - - /* Handle an attribute requiring a FUNCTION_DECL; arguments as in -diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc -index 86932d719..991661fe4 100644 ---- a/gcc/config/i386/i386-options.cc -+++ b/gcc/config/i386/i386-options.cc -@@ -3875,7 +3875,7 @@ handle_nodirect_extern_access_attribute (tree *pnode, tree name, - } - - /* Table of valid machine attributes. */ --const struct attribute_spec ix86_attribute_table[] = -+static const attribute_spec ix86_gnu_attributes[] = - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -3955,10 +3955,12 @@ const struct attribute_spec ix86_attribute_table[] = - { "cf_check", 0, 0, true, false, false, false, - ix86_handle_fndecl_attribute, NULL }, - { "nodirect_extern_access", 0, 0, true, false, false, false, -- handle_nodirect_extern_access_attribute, NULL }, -+ handle_nodirect_extern_access_attribute, NULL } -+}; - -- /* End element. */ -- { NULL, 0, 0, false, false, false, false, NULL, NULL } -+const scoped_attribute_specs ix86_gnu_attribute_table = -+{ -+ "gnu", ix86_gnu_attributes - }; - - #include "gt-i386-options.h" -diff --git a/gcc/config/i386/i386-options.h b/gcc/config/i386/i386-options.h -index ce4034f62..a7bdb22c0 100644 ---- a/gcc/config/i386/i386-options.h -+++ b/gcc/config/i386/i386-options.h -@@ -82,7 +82,7 @@ void ix86_function_specific_print (FILE *, int, - struct cl_target_option *); - bool ix86_valid_target_attribute_p (tree, tree, tree, int); - --extern const struct attribute_spec ix86_attribute_table[]; -+extern const struct scoped_attribute_specs ix86_gnu_attribute_table; - - - #endif /* GCC_I386_OPTIONS_H */ -diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc -index 83a0d8abb..ade965927 100644 ---- a/gcc/config/i386/i386.cc -+++ b/gcc/config/i386/i386.cc -@@ -24293,6 +24293,11 @@ ix86_run_selftests (void) - - #endif /* CHECKING_P */ - -+static const scoped_attribute_specs *const ix86_attribute_table[] = -+{ -+ &ix86_gnu_attribute_table -+}; -+ - /* Initialize the GCC target structure. */ - #undef TARGET_RETURN_IN_MEMORY - #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory -diff --git a/gcc/config/ia64/ia64.cc b/gcc/config/ia64/ia64.cc -index f9fb681a3..b9ced1c46 100644 ---- a/gcc/config/ia64/ia64.cc -+++ b/gcc/config/ia64/ia64.cc -@@ -357,7 +357,7 @@ static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d); - - - /* Table of valid machine attributes. */ --static const struct attribute_spec ia64_attribute_table[] = -+static const attribute_spec ia64_gnu_attributes[] = - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -369,8 +369,17 @@ static const struct attribute_spec ia64_attribute_table[] = - ia64_vms_common_object_attribute, NULL }, - #endif - { "version_id", 1, 1, true, false, false, false, -- ia64_handle_version_id_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } -+ ia64_handle_version_id_attribute, NULL } -+}; -+ -+static const scoped_attribute_specs ia64_gnu_attribute_table = -+{ -+ "gnu", ia64_gnu_attributes -+}; -+ -+static const scoped_attribute_specs *const ia64_attribute_table[] = -+{ -+ &ia64_gnu_attribute_table - }; - - /* Initialize the GCC target structure. */ -diff --git a/gcc/config/m32c/m32c.cc b/gcc/config/m32c/m32c.cc -index 11ca9a43a..a8f6523df 100644 ---- a/gcc/config/m32c/m32c.cc -+++ b/gcc/config/m32c/m32c.cc -@@ -2996,7 +2996,7 @@ current_function_special_page_vector (rtx x) - - #undef TARGET_ATTRIBUTE_TABLE - #define TARGET_ATTRIBUTE_TABLE m32c_attribute_table --static const struct attribute_spec m32c_attribute_table[] = { -+TARGET_GNU_ATTRIBUTES (m32c_attribute_table, { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ - { "interrupt", 0, 0, false, false, false, false, interrupt_handler, NULL }, -@@ -3004,9 +3004,8 @@ static const struct attribute_spec m32c_attribute_table[] = { - { "fast_interrupt", 0, 0, false, false, false, false, - interrupt_handler, NULL }, - { "function_vector", 1, 1, true, false, false, false, -- function_vector_handler, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ function_vector_handler, NULL } -+}); - - #undef TARGET_COMP_TYPE_ATTRIBUTES - #define TARGET_COMP_TYPE_ATTRIBUTES m32c_comp_type_attributes -diff --git a/gcc/config/m32r/m32r.cc b/gcc/config/m32r/m32r.cc -index bca768172..78a17f0a1 100644 ---- a/gcc/config/m32r/m32r.cc -+++ b/gcc/config/m32r/m32r.cc -@@ -111,15 +111,14 @@ static HOST_WIDE_INT m32r_starting_frame_offset (void); - - /* M32R specific attributes. */ - --static const struct attribute_spec m32r_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (m32r_attribute_table, - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ - { "interrupt", 0, 0, true, false, false, false, NULL, NULL }, - { "model", 1, 1, true, false, false, false, m32r_handle_model_attribute, -- NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ NULL } -+}); - - /* Initialize the GCC target structure. */ - #undef TARGET_ATTRIBUTE_TABLE -diff --git a/gcc/config/m68k/m68k.cc b/gcc/config/m68k/m68k.cc -index 62898dafe..effb6db8d 100644 ---- a/gcc/config/m68k/m68k.cc -+++ b/gcc/config/m68k/m68k.cc -@@ -360,7 +360,7 @@ static void m68k_asm_final_postscan_insn (FILE *, rtx_insn *insn, rtx [], int); - #undef TARGET_ASM_FINAL_POSTSCAN_INSN - #define TARGET_ASM_FINAL_POSTSCAN_INSN m68k_asm_final_postscan_insn - --static const struct attribute_spec m68k_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (m68k_attribute_table, - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -369,9 +369,8 @@ static const struct attribute_spec m68k_attribute_table[] = - { "interrupt_handler", 0, 0, true, false, false, false, - m68k_handle_fndecl_attribute, NULL }, - { "interrupt_thread", 0, 0, true, false, false, false, -- m68k_handle_fndecl_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ m68k_handle_fndecl_attribute, NULL } -+}); - - struct gcc_target targetm = TARGET_INITIALIZER; - -diff --git a/gcc/config/mcore/mcore.cc b/gcc/config/mcore/mcore.cc -index 28e707496..e497b0f44 100644 ---- a/gcc/config/mcore/mcore.cc -+++ b/gcc/config/mcore/mcore.cc -@@ -150,16 +150,15 @@ static bool mcore_modes_tieable_p (machine_mode, machine_mode); - - /* MCore specific attributes. */ - --static const struct attribute_spec mcore_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (mcore_attribute_table, - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ - { "dllexport", 0, 0, true, false, false, false, NULL, NULL }, - { "dllimport", 0, 0, true, false, false, false, NULL, NULL }, - { "naked", 0, 0, true, false, false, false, -- mcore_handle_naked_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ mcore_handle_naked_attribute, NULL } -+}); - - /* Initialize the GCC target structure. */ - #undef TARGET_ASM_EXTERNAL_LIBCALL -diff --git a/gcc/config/microblaze/microblaze.cc b/gcc/config/microblaze/microblaze.cc -index f32effecf..6b14d3e29 100644 ---- a/gcc/config/microblaze/microblaze.cc -+++ b/gcc/config/microblaze/microblaze.cc -@@ -218,15 +218,14 @@ int break_handler; - int fast_interrupt; - int save_volatiles; - --const struct attribute_spec microblaze_attribute_table[] = { -+TARGET_GNU_ATTRIBUTES (microblaze_attribute_table, { - /* name min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude */ - {"interrupt_handler", 0, 0, true, false, false, false, NULL, NULL }, - {"break_handler", 0, 0, true, false, false, false, NULL, NULL }, - {"fast_interrupt", 0, 0, true, false, false, false, NULL, NULL }, -- {"save_volatiles", 0, 0, true, false, false, false, NULL, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ {"save_volatiles", 0, 0, true, false, false, false, NULL, NULL } -+}); - - static int microblaze_interrupt_function_p (tree); - -diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc -index 02d11ddbf..5474ca152 100644 ---- a/gcc/config/mips/mips.cc -+++ b/gcc/config/mips/mips.cc -@@ -607,7 +607,7 @@ static tree mips_handle_use_shadow_register_set_attr (tree *, tree, tree, int, - bool *); - - /* The value of TARGET_ATTRIBUTE_TABLE. */ --static const struct attribute_spec mips_attribute_table[] = { -+TARGET_GNU_ATTRIBUTES (mips_attribute_table, { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ - { "long_call", 0, 0, false, true, true, false, NULL, NULL }, -@@ -629,9 +629,8 @@ static const struct attribute_spec mips_attribute_table[] = { - { "use_shadow_register_set", 0, 1, false, true, true, false, - mips_handle_use_shadow_register_set_attr, NULL }, - { "keep_interrupts_masked", 0, 0, false, true, true, false, NULL, NULL }, -- { "use_debug_exception_return", 0, 0, false, true, true, false, NULL, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ { "use_debug_exception_return", 0, 0, false, true, true, false, NULL, NULL } -+}); - - /* A table describing all the processors GCC knows about; see - mips-cpus.def for details. */ -diff --git a/gcc/config/msp430/msp430.cc b/gcc/config/msp430/msp430.cc -index 7a378ceac..f58855978 100644 ---- a/gcc/config/msp430/msp430.cc -+++ b/gcc/config/msp430/msp430.cc -@@ -2055,7 +2055,7 @@ static const struct attribute_spec::exclusions attr_either_exclusions[] = - #define TARGET_ATTRIBUTE_TABLE msp430_attribute_table - - /* Table of MSP430-specific attributes. */ --const struct attribute_spec msp430_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (msp430_attribute_table, - { - /* { name, min_num_args, max_num_args, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -2073,10 +2073,8 @@ const struct attribute_spec msp430_attribute_table[] = - { ATTR_UPPER, 0, 0, true, false, false, false, msp430_section_attr, - attr_upper_exclusions }, - { ATTR_EITHER, 0, 0, true, false, false, false, msp430_section_attr, -- attr_either_exclusions }, -- -- { NULL, 0, 0, false, false, false, false, NULL, NULL } -- }; -+ attr_either_exclusions } -+ }); - - #undef TARGET_HANDLE_GENERIC_ATTRIBUTE - #define TARGET_HANDLE_GENERIC_ATTRIBUTE msp430_handle_generic_attribute -diff --git a/gcc/config/nds32/nds32.cc b/gcc/config/nds32/nds32.cc -index 27530495f..519b11e4c 100644 ---- a/gcc/config/nds32/nds32.cc -+++ b/gcc/config/nds32/nds32.cc -@@ -288,7 +288,7 @@ static const int nds32_reg_alloc_order_for_speed[] = - }; - - /* Defining target-specific uses of __attribute__. */ --static const struct attribute_spec nds32_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (nds32_attribute_table, - { - /* Syntax: { name, min_len, max_len, decl_required, type_required, - function_type_required, affects_type_identity, handler, -@@ -326,11 +326,8 @@ static const struct attribute_spec nds32_attribute_table[] = - - /* FOR BACKWARD COMPATIBILITY, - this attribute also tells no prologue/epilogue. */ -- { "no_prologue", 0, 0, false, false, false, false, NULL, NULL }, -- -- /* The last attribute spec is set to be NULL. */ -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ { "no_prologue", 0, 0, false, false, false, false, NULL, NULL } -+}); - - - /* ------------------------------------------------------------------------ */ -diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc -index 7f2103ba6..9a3e418f4 100644 ---- a/gcc/config/nvptx/nvptx.cc -+++ b/gcc/config/nvptx/nvptx.cc -@@ -5817,16 +5817,15 @@ nvptx_handle_shared_attribute (tree *node, tree name, tree ARG_UNUSED (args), - } - - /* Table of valid machine attributes. */ --static const struct attribute_spec nvptx_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (nvptx_attribute_table, - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ - { "kernel", 0, 0, true, false, false, false, nvptx_handle_kernel_attribute, - NULL }, - { "shared", 0, 0, true, false, false, false, nvptx_handle_shared_attribute, -- NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ NULL } -+}); - - /* Limit vector alignments to BIGGEST_ALIGNMENT. */ - -diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc -index 9cf79beba..f5a27bdc9 100644 ---- a/gcc/config/riscv/riscv.cc -+++ b/gcc/config/riscv/riscv.cc -@@ -336,7 +336,7 @@ static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *); - static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *); - - /* Defining target-specific uses of __attribute__. */ --static const struct attribute_spec riscv_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (riscv_attribute_table, - { - /* Syntax: { name, min_len, max_len, decl_required, type_required, - function_type_required, affects_type_identity, handler, -@@ -347,11 +347,8 @@ static const struct attribute_spec riscv_attribute_table[] = - riscv_handle_fndecl_attribute, NULL }, - /* This attribute generates prologue/epilogue for interrupt handlers. */ - { "interrupt", 0, 1, false, true, true, false, -- riscv_handle_type_attribute, NULL }, -- -- /* The last attribute spec is set to be NULL. */ -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ riscv_handle_type_attribute, NULL } -+}); - - /* Order for the CLOBBERs/USEs of gpr_save. */ - static const unsigned gpr_save_reg_order[] = { -diff --git a/gcc/config/rl78/rl78.cc b/gcc/config/rl78/rl78.cc -index b3727c0a8..97386c7ea 100644 ---- a/gcc/config/rl78/rl78.cc -+++ b/gcc/config/rl78/rl78.cc -@@ -898,7 +898,7 @@ rl78_handle_vector_attribute (tree * node, - #define TARGET_ATTRIBUTE_TABLE rl78_attribute_table - - /* Table of RL78-specific attributes. */ --const struct attribute_spec rl78_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (rl78_attribute_table, - { - /* Name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude. */ -@@ -911,9 +911,8 @@ const struct attribute_spec rl78_attribute_table[] = - { "saddr", 0, 0, true, false, false, false, - rl78_handle_saddr_attribute, NULL }, - { "vector", 1, -1, true, false, false, false, -- rl78_handle_vector_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ rl78_handle_vector_attribute, NULL } -+}); - - - -diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc -index 55d4ce751..46e3d1a12 100644 ---- a/gcc/config/rs6000/rs6000.cc -+++ b/gcc/config/rs6000/rs6000.cc -@@ -1276,7 +1276,7 @@ static const char alt_reg_names[][8] = - - /* Table of valid machine attributes. */ - --static const struct attribute_spec rs6000_attribute_table[] = -+static const attribute_spec rs6000_gnu_attributes[] = - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -1293,7 +1293,16 @@ static const struct attribute_spec rs6000_attribute_table[] = - #ifdef SUBTARGET_ATTRIBUTE_TABLE - SUBTARGET_ATTRIBUTE_TABLE, - #endif -- { NULL, 0, 0, false, false, false, false, NULL, NULL } -+}; -+ -+static const scoped_attribute_specs rs6000_gnu_attribute_table = -+{ -+ "gnu", rs6000_gnu_attributes -+}; -+ -+static const scoped_attribute_specs *const rs6000_attribute_table[] = -+{ -+ &rs6000_gnu_attribute_table - }; - - #ifndef TARGET_PROFILE_KERNEL -diff --git a/gcc/config/rx/rx.cc b/gcc/config/rx/rx.cc -index 412a3a354..2f1178b00 100644 ---- a/gcc/config/rx/rx.cc -+++ b/gcc/config/rx/rx.cc -@@ -2759,7 +2759,7 @@ rx_handle_vector_attribute (tree * node, - } - - /* Table of RX specific attributes. */ --const struct attribute_spec rx_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (rx_attribute_table, - { - /* Name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude. */ -@@ -2770,9 +2770,8 @@ const struct attribute_spec rx_attribute_table[] = - { "naked", 0, 0, true, false, false, false, - rx_handle_func_attribute, NULL }, - { "vector", 1, -1, true, false, false, false, -- rx_handle_vector_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ rx_handle_vector_attribute, NULL } -+}); - - /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */ - -diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc -index f1599a5c5..dcdf7dad0 100644 ---- a/gcc/config/s390/s390.cc -+++ b/gcc/config/s390/s390.cc -@@ -1247,7 +1247,7 @@ s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED, - return NULL_TREE; - } - --static const struct attribute_spec s390_attribute_table[] = { -+TARGET_GNU_ATTRIBUTES (s390_attribute_table, { - { "hotpatch", 2, 2, true, false, false, false, - s390_handle_hotpatch_attribute, NULL }, - { "s390_vector_bool", 0, 0, false, true, false, true, -@@ -1263,11 +1263,8 @@ static const struct attribute_spec s390_attribute_table[] = { - { "function_return_reg", 1, 1, true, false, false, false, - s390_handle_string_attribute, NULL }, - { "function_return_mem", 1, 1, true, false, false, false, -- s390_handle_string_attribute, NULL }, -- -- /* End element. */ -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ s390_handle_string_attribute, NULL } -+}); - - /* Return the alignment for LABEL. We default to the -falign-labels - value except for the literal pool base label. */ -diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc -index 74d61c43b..5717b7ab8 100644 ---- a/gcc/config/sh/sh.cc -+++ b/gcc/config/sh/sh.cc -@@ -328,7 +328,7 @@ static bool sh_hard_regno_mode_ok (unsigned int, machine_mode); - static bool sh_modes_tieable_p (machine_mode, machine_mode); - static bool sh_can_change_mode_class (machine_mode, machine_mode, reg_class_t); - --static const struct attribute_spec sh_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (sh_attribute_table, - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -347,9 +347,8 @@ static const struct attribute_spec sh_attribute_table[] = - { "resbank", 0, 0, true, false, false, false, - sh_handle_resbank_handler_attribute, NULL }, - { "function_vector", 1, 1, true, false, false, false, -- sh2a_handle_function_vector_handler_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ sh2a_handle_function_vector_handler_attribute, NULL } -+}); - - /* Initialize the GCC target structure. */ - #undef TARGET_ATTRIBUTE_TABLE -diff --git a/gcc/config/sparc/sparc.cc b/gcc/config/sparc/sparc.cc -index 27db12e6b..61bf302db 100644 ---- a/gcc/config/sparc/sparc.cc -+++ b/gcc/config/sparc/sparc.cc -@@ -719,13 +719,12 @@ static HARD_REG_SET sparc_zero_call_used_regs (HARD_REG_SET); - - #ifdef SUBTARGET_ATTRIBUTE_TABLE - /* Table of valid machine attributes. */ --static const struct attribute_spec sparc_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (sparc_attribute_table, - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - do_diagnostic, handler, exclude } */ -- SUBTARGET_ATTRIBUTE_TABLE, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ SUBTARGET_ATTRIBUTE_TABLE -+}); - #endif - - char sparc_hard_reg_printed[8]; -diff --git a/gcc/config/stormy16/stormy16.cc b/gcc/config/stormy16/stormy16.cc -index fabf09ab9..3adc0212a 100644 ---- a/gcc/config/stormy16/stormy16.cc -+++ b/gcc/config/stormy16/stormy16.cc -@@ -2202,7 +2202,7 @@ static tree xstormy16_handle_interrupt_attribute - static tree xstormy16_handle_below100_attribute - (tree *, tree, tree, int, bool *); - --static const struct attribute_spec xstormy16_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (xstormy16_attribute_table, - { - /* name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude. */ -@@ -2211,9 +2211,8 @@ static const struct attribute_spec xstormy16_attribute_table[] = - { "BELOW100", 0, 0, false, false, false, false, - xstormy16_handle_below100_attribute, NULL }, - { "below100", 0, 0, false, false, false, false, -- xstormy16_handle_below100_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ xstormy16_handle_below100_attribute, NULL } -+}); - - /* Handle an "interrupt" attribute; - arguments as in struct attribute_spec.handler. */ -diff --git a/gcc/config/v850/v850.cc b/gcc/config/v850/v850.cc -index c7d432990..b7bbfb810 100644 ---- a/gcc/config/v850/v850.cc -+++ b/gcc/config/v850/v850.cc -@@ -3114,7 +3114,7 @@ v850_adjust_insn_length (rtx_insn *insn, int length) - - /* V850 specific attributes. */ - --static const struct attribute_spec v850_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (v850_attribute_table, - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -3127,9 +3127,8 @@ static const struct attribute_spec v850_attribute_table[] = - { "tda", 0, 0, true, false, false, false, - v850_handle_data_area_attribute, NULL }, - { "zda", 0, 0, true, false, false, false, -- v850_handle_data_area_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ v850_handle_data_area_attribute, NULL } -+}); - - static void - v850_option_override (void) -diff --git a/gcc/config/visium/visium.cc b/gcc/config/visium/visium.cc -index 35b46ced9..b572603bb 100644 ---- a/gcc/config/visium/visium.cc -+++ b/gcc/config/visium/visium.cc -@@ -145,14 +145,13 @@ static inline bool current_function_has_lr_slot (void); - - /* Supported attributes: - interrupt -- specifies this function is an interrupt handler. */ --static const struct attribute_spec visium_attribute_table[] = -+TARGET_GNU_ATTRIBUTES (visium_attribute_table, - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ - { "interrupt", 0, 0, true, false, false, false, visium_handle_interrupt_attr, -- NULL}, -- { NULL, 0, 0, false, false, false, false, NULL, NULL }, --}; -+ NULL} -+}); - - static struct machine_function *visium_init_machine_status (void); - -diff --git a/gcc/cp/cp-objcp-common.h b/gcc/cp/cp-objcp-common.h -index 3c04e5c02..ff0d0883a 100644 ---- a/gcc/cp/cp-objcp-common.h -+++ b/gcc/cp/cp-objcp-common.h -@@ -123,13 +123,16 @@ extern tree cxx_simulate_record_decl (location_t, const char *, - #undef LANG_HOOKS_FINALIZE_EARLY_DEBUG - #define LANG_HOOKS_FINALIZE_EARLY_DEBUG c_common_finalize_early_debug - --/* Attribute hooks. */ --#undef LANG_HOOKS_COMMON_ATTRIBUTE_TABLE --#define LANG_HOOKS_COMMON_ATTRIBUTE_TABLE c_common_attribute_table --#undef LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE --#define LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE c_common_format_attribute_table -+static const scoped_attribute_specs *const cp_objcp_attribute_table[] = -+{ -+ &std_attribute_table, -+ &cxx_gnu_attribute_table, -+ &c_common_gnu_attribute_table, -+ &c_common_format_attribute_table -+}; -+ - #undef LANG_HOOKS_ATTRIBUTE_TABLE --#define LANG_HOOKS_ATTRIBUTE_TABLE cxx_attribute_table -+#define LANG_HOOKS_ATTRIBUTE_TABLE cp_objcp_attribute_table - - #undef LANG_HOOKS_TREE_INLINING_VAR_MOD_TYPE_P - #define LANG_HOOKS_TREE_INLINING_VAR_MOD_TYPE_P cp_var_mod_type_p -diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h -index 64b3196d1..52d19faa3 100644 ---- a/gcc/cp/cp-tree.h -+++ b/gcc/cp/cp-tree.h -@@ -7897,7 +7897,8 @@ extern tree maybe_dummy_object (tree, tree *); - extern bool is_dummy_object (const_tree); - extern bool is_byte_access_type (tree); - extern bool is_byte_access_type_not_plain_char (tree); --extern const struct attribute_spec cxx_attribute_table[]; -+extern const struct scoped_attribute_specs cxx_gnu_attribute_table; -+extern const struct scoped_attribute_specs std_attribute_table; - extern tree make_ptrmem_cst (tree, tree); - extern tree cp_build_type_attribute_variant (tree, tree); - extern tree cp_build_reference_type (tree, bool); -diff --git a/gcc/cp/tree.cc b/gcc/cp/tree.cc -index a7933ad2c..6cfc7a2d7 100644 ---- a/gcc/cp/tree.cc -+++ b/gcc/cp/tree.cc -@@ -5004,7 +5004,7 @@ handle_likeliness_attribute (tree *node, tree name, tree args, - } - - /* Table of valid C++ attributes. */ --const struct attribute_spec cxx_attribute_table[] = -+static const attribute_spec cxx_gnu_attributes[] = - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -5012,11 +5012,15 @@ const struct attribute_spec cxx_attribute_table[] = - handle_init_priority_attribute, NULL }, - { "abi_tag", 1, -1, false, false, false, true, - handle_abi_tag_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } -+}; -+ -+const scoped_attribute_specs cxx_gnu_attribute_table = -+{ -+ "gnu", cxx_gnu_attributes - }; - - /* Table of C++ standard attributes. */ --const struct attribute_spec std_attribute_table[] = -+static const attribute_spec std_attributes[] = - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -5031,10 +5035,11 @@ const struct attribute_spec std_attribute_table[] = - { "unlikely", 0, 0, false, false, false, false, - handle_likeliness_attribute, attr_cold_hot_exclusions }, - { "noreturn", 0, 0, true, false, false, false, -- handle_noreturn_attribute, attr_noreturn_exclusions }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } -+ handle_noreturn_attribute, attr_noreturn_exclusions } - }; - -+const scoped_attribute_specs std_attribute_table = { nullptr, std_attributes }; -+ - /* Handle an "init_priority" attribute; arguments as in - struct attribute_spec.handler. */ - static tree -@@ -5617,7 +5622,6 @@ void - init_tree (void) - { - list_hash_table = hash_table::create_ggc (61); -- register_scoped_attributes (std_attribute_table, NULL); - } - - /* Returns the kind of special function that DECL (a FUNCTION_DECL) -diff --git a/gcc/d/d-attribs.cc b/gcc/d/d-attribs.cc -index c271de0c7..e402c0c11 100644 ---- a/gcc/d/d-attribs.cc -+++ b/gcc/d/d-attribs.cc -@@ -157,7 +157,7 @@ extern const struct attribute_spec::exclusions attr_cold_hot_exclusions[] = - - /* Table of machine-independent attributes. - For internal use (marking of built-ins) only. */ --const attribute_spec d_langhook_common_attribute_table[] = -+static const attribute_spec d_langhook_common_attributes[] = - { - ATTR_SPEC ("noreturn", 0, 0, true, false, false, false, - handle_noreturn_attribute, attr_noreturn_exclusions), -@@ -183,11 +183,15 @@ const attribute_spec d_langhook_common_attribute_table[] = - handle_type_generic_attribute, NULL), - ATTR_SPEC ("fn spec", 1, 1, false, true, true, false, - handle_fnspec_attribute, NULL), -- ATTR_SPEC (NULL, 0, 0, false, false, false, false, NULL, NULL), -+}; -+ -+const scoped_attribute_specs d_langhook_common_attribute_table = -+{ -+ "gnu", d_langhook_common_attributes - }; - - /* Table of D language attributes exposed by `gcc.attribute' UDAs. */ --const attribute_spec d_langhook_attribute_table[] = -+static const attribute_spec d_langhook_gnu_attributes[] = - { - ATTR_SPEC ("noinline", 0, 0, true, false, false, false, - d_handle_noinline_attribute, attr_noinline_exclusions), -@@ -223,9 +227,12 @@ const attribute_spec d_langhook_attribute_table[] = - d_handle_restrict_attribute, NULL), - ATTR_SPEC ("used", 0, 0, true, false, false, false, - d_handle_used_attribute, NULL), -- ATTR_SPEC (NULL, 0, 0, false, false, false, false, NULL, NULL), - }; - -+const scoped_attribute_specs d_langhook_gnu_attribute_table = -+{ -+ "gnu", d_langhook_gnu_attributes -+}; - - /* Insert the type attribute ATTRNAME with value VALUE into TYPE. - Returns a new variant of the original type declaration. */ -@@ -270,20 +277,14 @@ uda_attribute_p (const char *name) - - /* Search both our language, and target attribute tables. - Common and format attributes are kept internal. */ -- for (const attribute_spec *p = d_langhook_attribute_table; p->name; p++) -- { -- if (get_identifier (p->name) == ident) -- return true; -- } -+ for (const attribute_spec &p : d_langhook_gnu_attributes) -+ if (get_identifier (p.name) == ident) -+ return true; - -- if (targetm.attribute_table) -- { -- for (const attribute_spec *p = targetm.attribute_table; p->name; p++) -- { -- if (get_identifier (p->name) == ident) -- return true; -- } -- } -+ for (auto scoped_attributes : targetm.attribute_table) -+ for (const attribute_spec &p : scoped_attributes->attributes) -+ if (get_identifier (p.name) == ident) -+ return true; - - return false; - } -diff --git a/gcc/d/d-lang.cc b/gcc/d/d-lang.cc -index f078f24fc..da9d6d4a2 100644 ---- a/gcc/d/d-lang.cc -+++ b/gcc/d/d-lang.cc -@@ -1938,6 +1938,12 @@ d_enum_underlying_base_type (const_tree type) - return TREE_TYPE (type); - } - -+const scoped_attribute_specs *const d_langhook_attribute_table[] = -+{ -+ &d_langhook_gnu_attribute_table, -+ &d_langhook_common_attribute_table, -+}; -+ - /* Definitions for our language-specific hooks. */ - - #undef LANG_HOOKS_NAME -@@ -1949,7 +1955,6 @@ d_enum_underlying_base_type (const_tree type) - #undef LANG_HOOKS_HANDLE_OPTION - #undef LANG_HOOKS_POST_OPTIONS - #undef LANG_HOOKS_PARSE_FILE --#undef LANG_HOOKS_COMMON_ATTRIBUTE_TABLE - #undef LANG_HOOKS_ATTRIBUTE_TABLE - #undef LANG_HOOKS_GET_ALIAS_SET - #undef LANG_HOOKS_TYPES_COMPATIBLE_P -@@ -1981,7 +1986,6 @@ d_enum_underlying_base_type (const_tree type) - #define LANG_HOOKS_HANDLE_OPTION d_handle_option - #define LANG_HOOKS_POST_OPTIONS d_post_options - #define LANG_HOOKS_PARSE_FILE d_parse_file --#define LANG_HOOKS_COMMON_ATTRIBUTE_TABLE d_langhook_common_attribute_table - #define LANG_HOOKS_ATTRIBUTE_TABLE d_langhook_attribute_table - #define LANG_HOOKS_GET_ALIAS_SET d_get_alias_set - #define LANG_HOOKS_TYPES_COMPATIBLE_P d_types_compatible_p -diff --git a/gcc/d/d-tree.h b/gcc/d/d-tree.h -index aedbdd80a..d4245b63b 100644 ---- a/gcc/d/d-tree.h -+++ b/gcc/d/d-tree.h -@@ -496,8 +496,8 @@ extern tree insert_decl_attribute (tree, const char *, tree = NULL_TREE); - extern void apply_user_attributes (Dsymbol *, tree); - - /* In d-builtins.cc. */ --extern const attribute_spec d_langhook_attribute_table[]; --extern const attribute_spec d_langhook_common_attribute_table[]; -+extern const struct scoped_attribute_specs d_langhook_gnu_attribute_table; -+extern const struct scoped_attribute_specs d_langhook_common_attribute_table; - extern Type *build_frontend_type (tree); - - extern tree d_builtin_function (tree); -diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi -index 4f93facf7..95d96ce1b 100644 ---- a/gcc/doc/tm.texi -+++ b/gcc/doc/tm.texi -@@ -10427,12 +10427,33 @@ Target-specific attributes may be defined for functions, data and types. - These are described using the following target hooks; they also need to - be documented in @file{extend.texi}. - --@deftypevr {Target Hook} {const struct attribute_spec *} TARGET_ATTRIBUTE_TABLE --If defined, this target hook points to an array of @samp{struct --attribute_spec} (defined in @file{tree-core.h}) specifying the machine --specific attributes for this target and some of the restrictions on the --entities to which these attributes are applied and the arguments they --take. -+@deftypevr {Target Hook} {array_slice} TARGET_ATTRIBUTE_TABLE -+If defined, this target hook provides an array of -+@samp{scoped_attribute_spec}s (defined in @file{attribs.h}) that specify the -+machine-specific attributes for this target. The information includes some -+of the restrictions on the entities to which these attributes are applied -+and the arguments that the attributes take. -+ -+In C and C++, these attributes are associated with two syntaxes: -+the traditional GNU @code{__attribute__} syntax and the standard -+@samp{[[]]} syntax. Attributes that support the GNU syntax must be -+placed in the @code{gnu} namespace. Such attributes can then also be -+written @samp{[[gnu::@dots{}]]}. Attributes that use only the standard -+syntax should be placed in whichever namespace the attribute specification -+requires. For example, a target might choose to support vendor-specific -+@samp{[[]]} attributes that the vendor places in their own namespace. -+ -+Targets that only define attributes in the @code{gnu} namespace -+can uase the following shorthand to define the table: -+ -+@smallexample -+TARGET_GNU_ATTRIBUTES (@var{cpu_attribute_table}, @{ -+ @{ "@var{attribute1}", @dots{} @}, -+ @{ "@var{attribute2}", @dots{} @}, -+ @dots{}, -+ @{ "@var{attributen}", @dots{} @}, -+@}); -+@end smallexample - @end deftypevr - - @deftypefn {Target Hook} bool TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P (const_tree @var{name}) -diff --git a/gcc/fortran/f95-lang.cc b/gcc/fortran/f95-lang.cc -index 468a0b7e3..27ffc7511 100644 ---- a/gcc/fortran/f95-lang.cc -+++ b/gcc/fortran/f95-lang.cc -@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see - #include "cpp.h" - #include "trans-types.h" - #include "trans-const.h" -+#include "attribs.h" - - /* Language-dependent contents of an identifier. */ - -@@ -87,7 +88,7 @@ gfc_handle_omp_declare_target_attribute (tree *, tree, tree, int, bool *) - } - - /* Table of valid Fortran attributes. */ --static const struct attribute_spec gfc_attribute_table[] = -+static const attribute_spec gfc_gnu_attributes[] = - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -97,7 +98,16 @@ static const struct attribute_spec gfc_attribute_table[] = - gfc_handle_omp_declare_target_attribute, NULL }, - { "oacc function", 0, -1, true, false, false, false, - gfc_handle_omp_declare_target_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } -+}; -+ -+static const scoped_attribute_specs gfc_gnu_attribute_table = -+{ -+ "gnu", gfc_gnu_attributes -+}; -+ -+static const scoped_attribute_specs *const gfc_attribute_table[] = -+{ -+ &gfc_gnu_attribute_table - }; - - #undef LANG_HOOKS_NAME -diff --git a/gcc/jit/dummy-frontend.cc b/gcc/jit/dummy-frontend.cc -index 84ff359bf..5f9f5336c 100644 ---- a/gcc/jit/dummy-frontend.cc -+++ b/gcc/jit/dummy-frontend.cc -@@ -87,7 +87,7 @@ static const struct attribute_spec::exclusions attr_const_pure_exclusions[] = - }; - - /* Table of machine-independent attributes supported in libgccjit. */ --const struct attribute_spec jit_attribute_table[] = -+static const attribute_spec jit_gnu_attributes[] = - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -128,22 +128,36 @@ const struct attribute_spec jit_attribute_table[] = - /* For internal use only. The leading '*' both prevents its usage in - source code and signals that it may be overridden by machine tables. */ - { "*tm regparm", 0, 0, false, true, true, false, -- ignore_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } -+ ignore_attribute, NULL } -+}; -+ -+static const scoped_attribute_specs jit_gnu_attribute_table = -+{ -+ "gnu", jit_gnu_attributes - }; - - /* Give the specifications for the format attributes, used by C and all - descendants. */ - --const struct attribute_spec jit_format_attribute_table[] = -+static const attribute_spec jit_format_attributes[] = - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ - { "format", 3, 3, false, true, true, false, - handle_format_attribute, NULL }, - { "format_arg", 1, 1, false, true, true, false, -- handle_format_arg_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } -+ handle_format_arg_attribute, NULL } -+}; -+ -+static const scoped_attribute_specs jit_format_attribute_table = -+{ -+ "gnu", jit_format_attributes -+}; -+ -+static const scoped_attribute_specs *const jit_attribute_table[] = -+{ -+ &jit_gnu_attribute_table, -+ &jit_format_attribute_table - }; - - /* Attribute handlers. */ -@@ -722,10 +736,8 @@ jit_langhook_getdecls (void) - #define LANG_HOOKS_GETDECLS jit_langhook_getdecls - - /* Attribute hooks. */ --#undef LANG_HOOKS_COMMON_ATTRIBUTE_TABLE --#define LANG_HOOKS_COMMON_ATTRIBUTE_TABLE jit_attribute_table --#undef LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE --#define LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE jit_format_attribute_table -+#undef LANG_HOOKS_ATTRIBUTE_TABLE -+#define LANG_HOOKS_ATTRIBUTE_TABLE jit_attribute_table - - #undef LANG_HOOKS_DEEP_UNSHARING - #define LANG_HOOKS_DEEP_UNSHARING true -diff --git a/gcc/langhooks-def.h b/gcc/langhooks-def.h -index e22639517..11998e40f 100644 ---- a/gcc/langhooks-def.h -+++ b/gcc/langhooks-def.h -@@ -151,9 +151,7 @@ extern void lhd_finalize_early_debug (void); - #define LANG_HOOKS_FINALIZE_EARLY_DEBUG lhd_finalize_early_debug - - /* Attribute hooks. */ --#define LANG_HOOKS_ATTRIBUTE_TABLE NULL --#define LANG_HOOKS_COMMON_ATTRIBUTE_TABLE NULL --#define LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE NULL -+#define LANG_HOOKS_ATTRIBUTE_TABLE {} - - /* Tree inlining hooks. */ - #define LANG_HOOKS_TREE_INLINING_VAR_MOD_TYPE_P \ -@@ -365,8 +363,6 @@ extern void lhd_end_section (void); - LANG_HOOKS_PRINT_ERROR_FUNCTION, \ - LANG_HOOKS_TO_TARGET_CHARSET, \ - LANG_HOOKS_ATTRIBUTE_TABLE, \ -- LANG_HOOKS_COMMON_ATTRIBUTE_TABLE, \ -- LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE, \ - LANG_HOOKS_TREE_INLINING_INITIALIZER, \ - LANG_HOOKS_TREE_DUMP_INITIALIZER, \ - LANG_HOOKS_DECLS, \ -diff --git a/gcc/langhooks.h b/gcc/langhooks.h -index 4731f089a..5954f58e8 100644 ---- a/gcc/langhooks.h -+++ b/gcc/langhooks.h -@@ -530,9 +530,7 @@ struct lang_hooks - table of attributes specific to the language, a table of - attributes common to two or more languages (to allow easy - sharing), and a table of attributes for checking formats. */ -- const struct attribute_spec *attribute_table; -- const struct attribute_spec *common_attribute_table; -- const struct attribute_spec *format_attribute_table; -+ array_slice attribute_table; - - struct lang_hooks_for_tree_inlining tree_inlining; - -diff --git a/gcc/lto/lto-lang.cc b/gcc/lto/lto-lang.cc -index 8d58d924d..601e92e86 100644 ---- a/gcc/lto/lto-lang.cc -+++ b/gcc/lto/lto-lang.cc -@@ -94,7 +94,7 @@ static const struct attribute_spec::exclusions attr_const_pure_exclusions[] = - }; - - /* Table of machine-independent attributes supported in GIMPLE. */ --const struct attribute_spec lto_attribute_table[] = -+static const attribute_spec lto_gnu_attributes[] = - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -135,14 +135,18 @@ const struct attribute_spec lto_attribute_table[] = - /* For internal use only. The leading '*' both prevents its usage in - source code and signals that it may be overridden by machine tables. */ - { "*tm regparm", 0, 0, false, true, true, false, -- ignore_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } -+ ignore_attribute, NULL } -+}; -+ -+static const scoped_attribute_specs lto_gnu_attribute_table = -+{ -+ "gnu", lto_gnu_attributes - }; - - /* Give the specifications for the format attributes, used by C and all - descendants. */ - --const struct attribute_spec lto_format_attribute_table[] = -+static const attribute_spec lto_format_attributes[] = - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -150,7 +154,17 @@ const struct attribute_spec lto_format_attribute_table[] = - handle_format_attribute, NULL }, - { "format_arg", 1, 1, false, true, true, false, - handle_format_arg_attribute, NULL }, -- { NULL, 0, 0, false, false, false, false, NULL, NULL } -+}; -+ -+static const scoped_attribute_specs lto_format_attribute_table = -+{ -+ "gnu", lto_format_attributes -+}; -+ -+static const scoped_attribute_specs *const lto_attribute_table[] = -+{ -+ <o_gnu_attribute_table, -+ <o_format_attribute_table - }; - - enum built_in_attribute -@@ -1453,10 +1467,8 @@ static void lto_init_ts (void) - #define LANG_HOOKS_EH_PERSONALITY lto_eh_personality - - /* Attribute hooks. */ --#undef LANG_HOOKS_COMMON_ATTRIBUTE_TABLE --#define LANG_HOOKS_COMMON_ATTRIBUTE_TABLE lto_attribute_table --#undef LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE --#define LANG_HOOKS_FORMAT_ATTRIBUTE_TABLE lto_format_attribute_table -+#undef LANG_HOOKS_ATTRIBUTE_TABLE -+#define LANG_HOOKS_ATTRIBUTE_TABLE lto_attribute_table - - #undef LANG_HOOKS_BEGIN_SECTION - #define LANG_HOOKS_BEGIN_SECTION lto_obj_begin_section -diff --git a/gcc/plugin.h b/gcc/plugin.h -index ff999c405..e29651d35 100644 ---- a/gcc/plugin.h -+++ b/gcc/plugin.h -@@ -198,8 +198,7 @@ invoke_plugin_callbacks (int event ATTRIBUTE_UNUSED, - - extern void register_attribute (const struct attribute_spec *attr); - /* The default argument for the third parameter is given in attribs.h. */ --extern struct scoped_attributes* register_scoped_attributes (const struct attribute_spec *, -- const char *, -+extern struct scoped_attributes* register_scoped_attributes (const struct scoped_attribute_spec &, - bool); - - #endif /* PLUGIN_H */ -diff --git a/gcc/target-def.h b/gcc/target-def.h -index f81f8fe3b..70fb393f3 100644 ---- a/gcc/target-def.h -+++ b/gcc/target-def.h -@@ -114,6 +114,20 @@ - #define TARGET_FUNCTION_INCOMING_ARG TARGET_FUNCTION_ARG - #endif - -+/* Declare a target attribute table called NAME that only has GNU attributes. -+ There should be no null trailing element. E.g.: -+ -+ TARGET_GNU_ATTRIBUTES (aarch64_attribute_table, -+ { -+ { "aarch64_vector_pcs", ... }, -+ ... -+ }); */ -+ -+#define TARGET_GNU_ATTRIBUTES(NAME, ...) \ -+ static const attribute_spec NAME##_2[] = __VA_ARGS__; \ -+ static const scoped_attribute_specs NAME##_1 = { "gnu", NAME##_2 }; \ -+ static const scoped_attribute_specs *const NAME[] = { &NAME##_1 } -+ - #include "target-hooks-def.h" - - #include "hooks.h" -diff --git a/gcc/target.def b/gcc/target.def -index 60096c60c..6cdc09fc2 100644 ---- a/gcc/target.def -+++ b/gcc/target.def -@@ -2199,15 +2199,36 @@ merging.", - merge_type_attributes) - - /* Table of machine attributes and functions to handle them. -- Ignored if NULL. */ -+ Ignored if empty. */ - DEFHOOKPOD - (attribute_table, -- "If defined, this target hook points to an array of @samp{struct\n\ --attribute_spec} (defined in @file{tree-core.h}) specifying the machine\n\ --specific attributes for this target and some of the restrictions on the\n\ --entities to which these attributes are applied and the arguments they\n\ --take.", -- const struct attribute_spec *, NULL) -+ "If defined, this target hook provides an array of\n\ -+@samp{scoped_attribute_spec}s (defined in @file{attribs.h}) that specify the\n\ -+machine-specific attributes for this target. The information includes some\n\ -+of the restrictions on the entities to which these attributes are applied\n\ -+and the arguments that the attributes take.\n\ -+\n\ -+In C and C++, these attributes are associated with two syntaxes:\n\ -+the traditional GNU @code{__attribute__} syntax and the standard\n\ -+@samp{[[]]} syntax. Attributes that support the GNU syntax must be\n\ -+placed in the @code{gnu} namespace. Such attributes can then also be\n\ -+written @samp{[[gnu::@dots{}]]}. Attributes that use only the standard\n\ -+syntax should be placed in whichever namespace the attribute specification\n\ -+requires. For example, a target might choose to support vendor-specific\n\ -+@samp{[[]]} attributes that the vendor places in their own namespace.\n\ -+\n\ -+Targets that only define attributes in the @code{gnu} namespace\n\ -+can uase the following shorthand to define the table:\n\ -+\n\ -+@smallexample\n\ -+TARGET_GNU_ATTRIBUTES (@var{cpu_attribute_table}, @{\n\ -+ @{ \"@var{attribute1}\", @dots{} @},\n\ -+ @{ \"@var{attribute2}\", @dots{} @},\n\ -+ @dots{},\n\ -+ @{ \"@var{attributen}\", @dots{} @},\n\ -+@});\n\ -+@end smallexample", -+ array_slice, {}) - - /* Return true iff attribute NAME expects a plain identifier as its first - argument. */ -diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc -index f50dbbc52..67879c2c8 100644 ---- a/gcc/tree-inline.cc -+++ b/gcc/tree-inline.cc -@@ -4105,17 +4105,16 @@ inline_forbidden_p (tree fndecl) - static bool - function_attribute_inlinable_p (const_tree fndecl) - { -- if (targetm.attribute_table) -+ for (auto scoped_attributes : targetm.attribute_table) - { - const_tree a; - - for (a = DECL_ATTRIBUTES (fndecl); a; a = TREE_CHAIN (a)) - { - const_tree name = get_attribute_name (a); -- int i; - -- for (i = 0; targetm.attribute_table[i].name != NULL; i++) -- if (is_attribute_p (targetm.attribute_table[i].name, name)) -+ for (const attribute_spec &attribute : scoped_attributes->attributes) -+ if (is_attribute_p (attribute.name, name)) - return targetm.function_attribute_inlinable_p (fndecl); - } - } --- -2.33.0 - diff --git a/0173-Backport-SME-aarch64-Fix-plugin-header-install.patch b/0173-Backport-SME-aarch64-Fix-plugin-header-install.patch deleted file mode 100644 index 4bea8a5..0000000 --- a/0173-Backport-SME-aarch64-Fix-plugin-header-install.patch +++ /dev/null @@ -1,64 +0,0 @@ -From b1025ef48bff0622e54822dc0974f38748e9109f Mon Sep 17 00:00:00 2001 -From: Jakub Jelinek -Date: Thu, 22 Dec 2022 11:15:47 +0100 -Subject: [PATCH 074/157] [Backport][SME] aarch64: Fix plugin header install - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5b30e9bc211fede06cf85b54e466012540bef14d - -The r13-2943-g11a113d501ff64 made aarch64.h include -aarch64-option-extensions.def, but that file isn't installed -for building plugins. - -On Wed, Dec 21, 2022 at 09:56:33AM +0000, Richard Sandiford wrote: -> Should this (and aarch64-fusion-pairs.def and aarch64-tuning-flags.def) -> be in TM_H instead? The first two OPTIONS_H_EXTRA entries seem to be -> for aarch64-opt.h (included via aarch64.opt). -> -> I guess TM_H should also have aarch64-arches.def, since it's included -> for aarch64_feature. - -gcc/Makefile.in has -TM_H = $(GTM_H) insn-flags.h $(OPTIONS_H) -and -OPTIONS_H = options.h flag-types.h $(OPTIONS_H_EXTRA) -which means that adding something into TM_H when it is already in -OPTIONS_H_EXTRA is a unnecessary. -It is true that aarch64-fusion-pairs.def (included by aarch64-protos.h) -and aarch64-tuning-flags.def (ditto) and aarch64-option-extensions.def -(included by aarch64.h) aren't needed for options.h, so I think the -right patch would be following. - -2022-12-22 Jakub Jelinek - - * config/aarch64/t-aarch64 (TM_H): Don't add aarch64-cores.def, - add aarch64-fusion-pairs.def, aarch64-tuning-flags.def and - aarch64-option-extensions.def. - (OPTIONS_H_EXTRA): Don't add aarch64-fusion-pairs.def nor - aarch64-tuning-flags.def. ---- - gcc/config/aarch64/t-aarch64 | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64 -index ba74abc0a..6a21a248f 100644 ---- a/gcc/config/aarch64/t-aarch64 -+++ b/gcc/config/aarch64/t-aarch64 -@@ -18,11 +18,11 @@ - # along with GCC; see the file COPYING3. If not see - # . - --TM_H += $(srcdir)/config/aarch64/aarch64-cores.def -+TM_H += $(srcdir)/config/aarch64/aarch64-fusion-pairs.def \ -+ $(srcdir)/config/aarch64/aarch64-tuning-flags.def \ -+ $(srcdir)/config/aarch64/aarch64-option-extensions.def - OPTIONS_H_EXTRA += $(srcdir)/config/aarch64/aarch64-cores.def \ -- $(srcdir)/config/aarch64/aarch64-arches.def \ -- $(srcdir)/config/aarch64/aarch64-fusion-pairs.def \ -- $(srcdir)/config/aarch64/aarch64-tuning-flags.def -+ $(srcdir)/config/aarch64/aarch64-arches.def - - $(srcdir)/config/aarch64/aarch64-tune.md: s-aarch64-tune-md; @true - s-aarch64-tune-md: $(srcdir)/config/aarch64/gentune.sh \ --- -2.33.0 - diff --git a/0174-Backport-SME-aarch64-Add-arm_streaming-_compatible-a.patch b/0174-Backport-SME-aarch64-Add-arm_streaming-_compatible-a.patch deleted file mode 100644 index 381f4ce..0000000 --- a/0174-Backport-SME-aarch64-Add-arm_streaming-_compatible-a.patch +++ /dev/null @@ -1,1178 +0,0 @@ -From 70b732b4518dd0e44b9e6bfaaad78492b8db8f29 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:23 +0000 -Subject: [PATCH 075/157] [Backport][SME] aarch64: Add - arm_streaming(_compatible) attributes - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2c9a54b4238308b127c3b60b01a591363131e7db - -This patch adds support for recognising the SME arm::streaming -and arm::streaming_compatible attributes. These attributes -respectively describe whether the processor is definitely in -"streaming mode" (PSTATE.SM==1), whether the processor is -definitely not in streaming mode (PSTATE.SM==0), or whether -we don't know at compile time either way. - -As far as the compiler is concerned, this effectively creates three -ISA submodes: streaming mode enables things that are not available -in non-streaming mode, non-streaming mode enables things that not -available in streaming mode, and streaming-compatible mode has to stick -to the common subset. This means that some instructions are conditional -on PSTATE.SM==1 and some are conditional on PSTATE.SM==0. - -I wondered about recording the streaming state in a new variable. -However, the set of available instructions is also influenced by -PSTATE.ZA (added later), so I think it makes sense to view this -as an instance of a more general mechanism. Also, keeping the -PSTATE.SM state in the same flag variable as the other ISA -features makes it possible to sum up the requirements of an -ACLE function in a single value. - -The patch therefore adds a new set of feature flags called "ISA modes". -Unlike the other two sets of flags (optional features and architecture- -level features), these ISA modes are not controlled directly by -command-line parameters or "target" attributes. - -arm::streaming and arm::streaming_compatible are function type attributes -rather than function declaration attributes. This means that we need -to find somewhere to copy the type information across to a function's -target options. The patch does this in aarch64_set_current_function. - -We also need to record which ISA mode a callee expects/requires -to be active on entry. (The same mode is then active on return.) -The patch extends the current UNSPEC_CALLEE_ABI cookie to include -this information, as well as the PCS variant that it recorded -previously. - -The attributes can also be written __arm_streaming and -__arm_streaming_compatible. This has two advantages: it triggers -an error on compilers that don't understand the attributes, and it -eases use on C, where [[...]] attributes were only added in C23. - -gcc/ - * config/aarch64/aarch64-isa-modes.def: New file. - * config/aarch64/aarch64.h: Include it in the feature enumerations. - (AARCH64_FL_SM_STATE, AARCH64_FL_ISA_MODES): New constants. - (AARCH64_FL_DEFAULT_ISA_MODE): Likewise. - (AARCH64_ISA_MODE): New macro. - (CUMULATIVE_ARGS): Add an isa_mode field. - * config/aarch64/aarch64-protos.h (aarch64_gen_callee_cookie): Declare. - (aarch64_tlsdesc_abi_id): Return an arm_pcs. - * config/aarch64/aarch64.cc (attr_streaming_exclusions) - (aarch64_gnu_attributes, aarch64_gnu_attribute_table) - (aarch64_arm_attributes, aarch64_arm_attribute_table): New tables. - (aarch64_attribute_table): Redefine to include the gnu and arm - attributes. - (aarch64_fntype_pstate_sm, aarch64_fntype_isa_mode): New functions. - (aarch64_fndecl_pstate_sm, aarch64_fndecl_isa_mode): Likewise. - (aarch64_gen_callee_cookie, aarch64_callee_abi): Likewise. - (aarch64_insn_callee_cookie, aarch64_insn_callee_abi): Use them. - (aarch64_function_arg, aarch64_output_mi_thunk): Likewise. - (aarch64_init_cumulative_args): Initialize the isa_mode field. - (aarch64_output_mi_thunk): Use aarch64_gen_callee_cookie to get - the ABI cookie. - (aarch64_override_options): Add the ISA mode to the feature set. - (aarch64_temporary_target::copy_from_fndecl): Likewise. - (aarch64_fndecl_options, aarch64_handle_attr_arch): Likewise. - (aarch64_set_current_function): Maintain the correct ISA mode. - (aarch64_tlsdesc_abi_id): Return an arm_pcs. - (aarch64_comp_type_attributes): Handle arm::streaming and - arm::streaming_compatible. - * config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros): - Define __arm_streaming and __arm_streaming_compatible. - * config/aarch64/aarch64.md (tlsdesc_small_): Use - aarch64_gen_callee_cookie to get the ABI cookie. - * config/aarch64/t-aarch64 (TM_H): Add all feature-related .def files. - -gcc/testsuite/ - * gcc.target/aarch64/sme/aarch64-sme.exp: New harness. - * gcc.target/aarch64/sme/streaming_mode_1.c: New test. - * gcc.target/aarch64/sme/streaming_mode_2.c: Likewise. - * gcc.target/aarch64/sme/keyword_macros_1.c: Likewise. - * g++.target/aarch64/sme/aarch64-sme.exp: New harness. - * g++.target/aarch64/sme/streaming_mode_1.C: New test. - * g++.target/aarch64/sme/streaming_mode_2.C: Likewise. - * g++.target/aarch64/sme/keyword_macros_1.C: Likewise. - * gcc.target/aarch64/auto-init-1.c: Only expect the call insn - to contain 1 (const_int 0), not 2. ---- - gcc/config/aarch64/aarch64-c.cc | 14 ++ - gcc/config/aarch64/aarch64-isa-modes.def | 35 +++ - gcc/config/aarch64/aarch64-protos.h | 3 +- - gcc/config/aarch64/aarch64.cc | 233 +++++++++++++++--- - gcc/config/aarch64/aarch64.h | 24 +- - gcc/config/aarch64/aarch64.md | 3 +- - gcc/config/aarch64/t-aarch64 | 5 +- - .../g++.target/aarch64/sme/aarch64-sme.exp | 40 +++ - .../g++.target/aarch64/sme/keyword_macros_1.C | 4 + - .../g++.target/aarch64/sme/streaming_mode_1.C | 142 +++++++++++ - .../g++.target/aarch64/sme/streaming_mode_2.C | 25 ++ - .../gcc.target/aarch64/auto-init-1.c | 3 +- - .../gcc.target/aarch64/sme/aarch64-sme.exp | 40 +++ - .../gcc.target/aarch64/sme/keyword_macros_1.c | 4 + - .../gcc.target/aarch64/sme/streaming_mode_1.c | 130 ++++++++++ - .../gcc.target/aarch64/sme/streaming_mode_2.c | 25 ++ - 16 files changed, 685 insertions(+), 45 deletions(-) - create mode 100644 gcc/config/aarch64/aarch64-isa-modes.def - create mode 100644 gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp - create mode 100644 gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C - create mode 100644 gcc/testsuite/g++.target/aarch64/sme/streaming_mode_1.C - create mode 100644 gcc/testsuite/g++.target/aarch64/sme/streaming_mode_2.C - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c - -diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc -index 4085ad840..397745fbd 100644 ---- a/gcc/config/aarch64/aarch64-c.cc -+++ b/gcc/config/aarch64/aarch64-c.cc -@@ -72,6 +72,20 @@ aarch64_define_unconditional_macros (cpp_reader *pfile) - builtin_define_with_int_value ("__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE / 8); - - builtin_define ("__GCC_ASM_FLAG_OUTPUTS__"); -+ -+ /* Define keyword attributes like __arm_streaming as macros that expand -+ to the associated [[...]] attribute. Use __extension__ in the attribute -+ for C, since the [[...]] syntax was only added in C23. */ -+#define DEFINE_ARM_KEYWORD_MACRO(NAME) \ -+ builtin_define_with_value ("__arm_" NAME, \ -+ lang_GNU_CXX () \ -+ ? "[[arm::" NAME "]]" \ -+ : "[[__extension__ arm::" NAME "]]", 0); -+ -+ DEFINE_ARM_KEYWORD_MACRO ("streaming"); -+ DEFINE_ARM_KEYWORD_MACRO ("streaming_compatible"); -+ -+#undef DEFINE_ARM_KEYWORD_MACRO - } - - /* Undefine/redefine macros that depend on the current backend state and may -diff --git a/gcc/config/aarch64/aarch64-isa-modes.def b/gcc/config/aarch64/aarch64-isa-modes.def -new file mode 100644 -index 000000000..5915c98a8 ---- /dev/null -+++ b/gcc/config/aarch64/aarch64-isa-modes.def -@@ -0,0 +1,35 @@ -+/* Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published -+ by the Free Software Foundation; either version 3, or (at your -+ option) any later version. -+ -+ GCC is distributed in the hope that it will be useful, but WITHOUT -+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -+ License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ . */ -+ -+/* This file defines a set of "ISA modes"; in other words, it defines -+ various bits of runtime state that control the set of available -+ instructions or that affect the semantics of instructions in some way. -+ -+ Before using #include to read this file, define a macro: -+ -+ DEF_AARCH64_ISA_MODE(NAME) -+ -+ where NAME is the name of the mode. */ -+ -+/* Indicates that PSTATE.SM is known to be 1 or 0 respectively. These -+ modes are mutually exclusive. If neither mode is active then the state -+ of PSTATE.SM is not known at compile time. */ -+DEF_AARCH64_ISA_MODE(SM_ON) -+DEF_AARCH64_ISA_MODE(SM_OFF) -+ -+#undef DEF_AARCH64_ISA_MODE -diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h -index 14a568140..9b03410dc 100644 ---- a/gcc/config/aarch64/aarch64-protos.h -+++ b/gcc/config/aarch64/aarch64-protos.h -@@ -772,6 +772,7 @@ bool aarch64_constant_address_p (rtx); - bool aarch64_emit_approx_div (rtx, rtx, rtx); - bool aarch64_emit_approx_sqrt (rtx, rtx, bool); - tree aarch64_vector_load_decl (tree); -+rtx aarch64_gen_callee_cookie (aarch64_feature_flags, arm_pcs); - void aarch64_expand_call (rtx, rtx, rtx, bool); - bool aarch64_expand_cpymem (rtx *); - bool aarch64_expand_setmem (rtx *); -@@ -851,7 +852,7 @@ bool aarch64_is_mov_xn_imm (unsigned HOST_WIDE_INT); - bool aarch64_use_return_insn_p (void); - const char *aarch64_output_casesi (rtx *); - --unsigned int aarch64_tlsdesc_abi_id (); -+arm_pcs aarch64_tlsdesc_abi_id (); - enum aarch64_symbol_type aarch64_classify_symbol (rtx, HOST_WIDE_INT); - enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx); - enum reg_class aarch64_regno_regclass (unsigned); -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 114252a3c..904166b21 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -2985,8 +2985,18 @@ handle_aarch64_vector_pcs_attribute (tree *node, tree name, tree, - gcc_unreachable (); - } - -+/* Mutually-exclusive function type attributes for controlling PSTATE.SM. */ -+static const struct attribute_spec::exclusions attr_streaming_exclusions[] = -+{ -+ /* Attribute name exclusion applies to: -+ function, type, variable */ -+ { "streaming", false, true, false }, -+ { "streaming_compatible", false, true, false }, -+ { NULL, false, false, false } -+}; -+ - /* Table of machine attributes. */ --TARGET_GNU_ATTRIBUTES (aarch64_attribute_table, -+static const attribute_spec aarch64_gnu_attributes[] = - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -@@ -2998,7 +3008,31 @@ TARGET_GNU_ATTRIBUTES (aarch64_attribute_table, - { "Advanced SIMD type", 1, 1, false, true, false, true, NULL, NULL }, - { "SVE type", 3, 3, false, true, false, true, NULL, NULL }, - { "SVE sizeless type", 0, 0, false, true, false, true, NULL, NULL } --}); -+}; -+ -+static const scoped_attribute_specs aarch64_gnu_attribute_table = -+{ -+ "gnu", aarch64_gnu_attributes -+}; -+ -+static const attribute_spec aarch64_arm_attributes[] = -+{ -+ { "streaming", 0, 0, false, true, true, true, -+ NULL, attr_streaming_exclusions }, -+ { "streaming_compatible", 0, 0, false, true, true, true, -+ NULL, attr_streaming_exclusions }, -+}; -+ -+static const scoped_attribute_specs aarch64_arm_attribute_table = -+{ -+ "arm", aarch64_arm_attributes -+}; -+ -+static const scoped_attribute_specs *const aarch64_attribute_table[] = -+{ -+ &aarch64_gnu_attribute_table, -+ &aarch64_arm_attribute_table -+}; - - /* An ISA extension in the co-processor and main instruction set space. */ - struct aarch64_option_extension -@@ -4301,6 +4335,48 @@ aarch64_fntype_abi (const_tree fntype) - return default_function_abi; - } - -+/* Return the state of PSTATE.SM on entry to functions of type FNTYPE. */ -+ -+static aarch64_feature_flags -+aarch64_fntype_pstate_sm (const_tree fntype) -+{ -+ if (lookup_attribute ("arm", "streaming", TYPE_ATTRIBUTES (fntype))) -+ return AARCH64_FL_SM_ON; -+ -+ if (lookup_attribute ("arm", "streaming_compatible", -+ TYPE_ATTRIBUTES (fntype))) -+ return 0; -+ -+ return AARCH64_FL_SM_OFF; -+} -+ -+/* Return the ISA mode on entry to functions of type FNTYPE. */ -+ -+static aarch64_feature_flags -+aarch64_fntype_isa_mode (const_tree fntype) -+{ -+ return aarch64_fntype_pstate_sm (fntype); -+} -+ -+/* Return the state of PSTATE.SM when compiling the body of -+ function FNDECL. This might be different from the state of -+ PSTATE.SM on entry. */ -+ -+static aarch64_feature_flags -+aarch64_fndecl_pstate_sm (const_tree fndecl) -+{ -+ return aarch64_fntype_pstate_sm (TREE_TYPE (fndecl)); -+} -+ -+/* Return the ISA mode that should be used to compile the body of -+ function FNDECL. */ -+ -+static aarch64_feature_flags -+aarch64_fndecl_isa_mode (const_tree fndecl) -+{ -+ return aarch64_fndecl_pstate_sm (fndecl); -+} -+ - /* Implement TARGET_COMPATIBLE_VECTOR_TYPES_P. */ - - static bool -@@ -4363,17 +4439,46 @@ aarch64_reg_save_mode (unsigned int regno) - gcc_unreachable (); - } - --/* Implement TARGET_INSN_CALLEE_ABI. */ -+/* Given the ISA mode on entry to a callee and the ABI of the callee, -+ return the CONST_INT that should be placed in an UNSPEC_CALLEE_ABI rtx. */ - --const predefined_function_abi & --aarch64_insn_callee_abi (const rtx_insn *insn) -+rtx -+aarch64_gen_callee_cookie (aarch64_feature_flags isa_mode, arm_pcs pcs_variant) -+{ -+ return gen_int_mode ((unsigned int) isa_mode -+ | (unsigned int) pcs_variant << AARCH64_NUM_ISA_MODES, -+ DImode); -+} -+ -+/* COOKIE is a CONST_INT from an UNSPEC_CALLEE_ABI rtx. Return the -+ callee's ABI. */ -+ -+static const predefined_function_abi & -+aarch64_callee_abi (rtx cookie) -+{ -+ return function_abis[UINTVAL (cookie) >> AARCH64_NUM_ISA_MODES]; -+} -+ -+/* INSN is a call instruction. Return the CONST_INT stored in its -+ UNSPEC_CALLEE_ABI rtx. */ -+ -+static rtx -+aarch64_insn_callee_cookie (const rtx_insn *insn) - { - rtx pat = PATTERN (insn); - gcc_assert (GET_CODE (pat) == PARALLEL); - rtx unspec = XVECEXP (pat, 0, 1); - gcc_assert (GET_CODE (unspec) == UNSPEC - && XINT (unspec, 1) == UNSPEC_CALLEE_ABI); -- return function_abis[INTVAL (XVECEXP (unspec, 0, 0))]; -+ return XVECEXP (unspec, 0, 0); -+} -+ -+/* Implement TARGET_INSN_CALLEE_ABI. */ -+ -+const predefined_function_abi & -+aarch64_insn_callee_abi (const rtx_insn *insn) -+{ -+ return aarch64_callee_abi (aarch64_insn_callee_cookie (insn)); - } - - /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves -@@ -8117,7 +8222,7 @@ aarch64_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg) - || pcum->pcs_variant == ARM_PCS_SVE); - - if (arg.end_marker_p ()) -- return gen_int_mode (pcum->pcs_variant, DImode); -+ return aarch64_gen_callee_cookie (pcum->isa_mode, pcum->pcs_variant); - - aarch64_layout_arg (pcum_v, arg); - return pcum->aapcs_reg; -@@ -8138,9 +8243,15 @@ aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, - pcum->aapcs_nextnvrn = 0; - pcum->aapcs_nextnprn = 0; - if (fntype) -- pcum->pcs_variant = (arm_pcs) fntype_abi (fntype).id (); -+ { -+ pcum->pcs_variant = (arm_pcs) fntype_abi (fntype).id (); -+ pcum->isa_mode = aarch64_fntype_isa_mode (fntype); -+ } - else -- pcum->pcs_variant = ARM_PCS_AAPCS64; -+ { -+ pcum->pcs_variant = ARM_PCS_AAPCS64; -+ pcum->isa_mode = AARCH64_FL_DEFAULT_ISA_MODE; -+ } - pcum->aapcs_reg = NULL_RTX; - pcum->aapcs_arg_processed = false; - pcum->aapcs_stack_words = 0; -@@ -10627,7 +10738,9 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, - } - funexp = XEXP (DECL_RTL (function), 0); - funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); -- rtx callee_abi = gen_int_mode (fndecl_abi (function).id (), DImode); -+ auto isa_mode = aarch64_fntype_isa_mode (TREE_TYPE (function)); -+ auto pcs_variant = arm_pcs (fndecl_abi (function).id ()); -+ rtx callee_abi = aarch64_gen_callee_cookie (isa_mode, pcs_variant); - insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, callee_abi)); - SIBLING_CALL_P (insn) = 1; - -@@ -18618,6 +18731,7 @@ aarch64_override_options (void) - SUBTARGET_OVERRIDE_OPTIONS; - #endif - -+ auto isa_mode = AARCH64_FL_DEFAULT_ISA_MODE; - if (cpu && arch) - { - /* If both -mcpu and -march are specified, warn if they are not -@@ -18630,25 +18744,25 @@ aarch64_override_options (void) - } - - selected_arch = arch->arch; -- aarch64_set_asm_isa_flags (arch_isa); -+ aarch64_set_asm_isa_flags (arch_isa | isa_mode); - } - else if (cpu) - { - selected_arch = cpu->arch; -- aarch64_set_asm_isa_flags (cpu_isa); -+ aarch64_set_asm_isa_flags (cpu_isa | isa_mode); - } - else if (arch) - { - cpu = &all_cores[arch->ident]; - selected_arch = arch->arch; -- aarch64_set_asm_isa_flags (arch_isa); -+ aarch64_set_asm_isa_flags (arch_isa | isa_mode); - } - else - { - /* No -mcpu or -march specified, so use the default CPU. */ - cpu = &all_cores[TARGET_CPU_DEFAULT]; - selected_arch = cpu->arch; -- aarch64_set_asm_isa_flags (cpu->flags); -+ aarch64_set_asm_isa_flags (cpu->flags | isa_mode); - } - - selected_tune = tune ? tune->ident : cpu->ident; -@@ -18821,6 +18935,21 @@ aarch64_save_restore_target_globals (tree new_tree) - TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); - } - -+/* Return the target_option_node for FNDECL, or the current options -+ if FNDECL is null. */ -+ -+static tree -+aarch64_fndecl_options (tree fndecl) -+{ -+ if (!fndecl) -+ return target_option_current_node; -+ -+ if (tree options = DECL_FUNCTION_SPECIFIC_TARGET (fndecl)) -+ return options; -+ -+ return target_option_default_node; -+} -+ - /* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions - like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET - of the function, if such exists. This function may be called multiple -@@ -18830,25 +18959,24 @@ aarch64_save_restore_target_globals (tree new_tree) - static void - aarch64_set_current_function (tree fndecl) - { -- if (!fndecl || fndecl == aarch64_previous_fndecl) -- return; -- -- tree old_tree = (aarch64_previous_fndecl -- ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl) -- : NULL_TREE); -- -- tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); -+ tree old_tree = aarch64_fndecl_options (aarch64_previous_fndecl); -+ tree new_tree = aarch64_fndecl_options (fndecl); - -- /* If current function has no attributes but the previous one did, -- use the default node. */ -- if (!new_tree && old_tree) -- new_tree = target_option_default_node; -+ auto new_isa_mode = (fndecl -+ ? aarch64_fndecl_isa_mode (fndecl) -+ : AARCH64_FL_DEFAULT_ISA_MODE); -+ auto isa_flags = TREE_TARGET_OPTION (new_tree)->x_aarch64_isa_flags; - - /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to - the default have been handled by aarch64_save_restore_target_globals from - aarch64_pragma_target_parse. */ -- if (old_tree == new_tree) -- return; -+ if (old_tree == new_tree -+ && (!fndecl || aarch64_previous_fndecl) -+ && (isa_flags & AARCH64_FL_ISA_MODES) == new_isa_mode) -+ { -+ gcc_assert (AARCH64_ISA_MODE == new_isa_mode); -+ return; -+ } - - aarch64_previous_fndecl = fndecl; - -@@ -18856,7 +18984,28 @@ aarch64_set_current_function (tree fndecl) - cl_target_option_restore (&global_options, &global_options_set, - TREE_TARGET_OPTION (new_tree)); - -+ /* The ISA mode can vary based on function type attributes and -+ function declaration attributes. Make sure that the target -+ options correctly reflect these attributes. */ -+ if ((isa_flags & AARCH64_FL_ISA_MODES) != new_isa_mode) -+ { -+ auto base_flags = (aarch64_asm_isa_flags & ~AARCH64_FL_ISA_MODES); -+ aarch64_set_asm_isa_flags (base_flags | new_isa_mode); -+ -+ aarch64_override_options_internal (&global_options); -+ new_tree = build_target_option_node (&global_options, -+ &global_options_set); -+ DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_tree; -+ -+ tree new_optimize = build_optimization_node (&global_options, -+ &global_options_set); -+ if (new_optimize != optimization_default_node) -+ DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; -+ } -+ - aarch64_save_restore_target_globals (new_tree); -+ -+ gcc_assert (AARCH64_ISA_MODE == new_isa_mode); - } - - /* Enum describing the various ways we can handle attributes. -@@ -18906,7 +19055,7 @@ aarch64_handle_attr_arch (const char *str) - { - gcc_assert (tmp_arch); - selected_arch = tmp_arch->arch; -- aarch64_set_asm_isa_flags (tmp_flags); -+ aarch64_set_asm_isa_flags (tmp_flags | AARCH64_ISA_MODE); - return true; - } - -@@ -18947,7 +19096,7 @@ aarch64_handle_attr_cpu (const char *str) - gcc_assert (tmp_cpu); - selected_tune = tmp_cpu->ident; - selected_arch = tmp_cpu->arch; -- aarch64_set_asm_isa_flags (tmp_flags); -+ aarch64_set_asm_isa_flags (tmp_flags | AARCH64_ISA_MODE); - return true; - } - -@@ -19047,7 +19196,7 @@ aarch64_handle_attr_isa_flags (char *str) - features if the user wants to handpick specific features. */ - if (strncmp ("+nothing", str, 8) == 0) - { -- isa_flags = 0; -+ isa_flags = AARCH64_ISA_MODE; - str += 8; - } - -@@ -19552,7 +19701,7 @@ aarch64_can_inline_p (tree caller, tree callee) - /* Return the ID of the TLDESC ABI, initializing the descriptor if hasn't - been already. */ - --unsigned int -+arm_pcs - aarch64_tlsdesc_abi_id () - { - predefined_function_abi &tlsdesc_abi = function_abis[ARM_PCS_TLSDESC]; -@@ -19566,7 +19715,7 @@ aarch64_tlsdesc_abi_id () - SET_HARD_REG_BIT (full_reg_clobbers, regno); - tlsdesc_abi.initialize (ARM_PCS_TLSDESC, full_reg_clobbers); - } -- return tlsdesc_abi.id (); -+ return ARM_PCS_TLSDESC; - } - - /* Return true if SYMBOL_REF X binds locally. */ -@@ -27270,22 +27419,26 @@ aarch64_simd_clone_usable (struct cgraph_node *node) - static int - aarch64_comp_type_attributes (const_tree type1, const_tree type2) - { -- auto check_attr = [&](const char *name) { -- tree attr1 = lookup_attribute (name, TYPE_ATTRIBUTES (type1)); -- tree attr2 = lookup_attribute (name, TYPE_ATTRIBUTES (type2)); -+ auto check_attr = [&](const char *ns, const char *name) { -+ tree attr1 = lookup_attribute (ns, name, TYPE_ATTRIBUTES (type1)); -+ tree attr2 = lookup_attribute (ns, name, TYPE_ATTRIBUTES (type2)); - if (!attr1 && !attr2) - return true; - - return attr1 && attr2 && attribute_value_equal (attr1, attr2); - }; - -- if (!check_attr ("aarch64_vector_pcs")) -+ if (!check_attr ("gnu", "aarch64_vector_pcs")) -+ return 0; -+ if (!check_attr ("gnu", "Advanced SIMD type")) -+ return 0; -+ if (!check_attr ("gnu", "SVE type")) - return 0; -- if (!check_attr ("Advanced SIMD type")) -+ if (!check_attr ("gnu", "SVE sizeless type")) - return 0; -- if (!check_attr ("SVE type")) -+ if (!check_attr ("arm", "streaming")) - return 0; -- if (!check_attr ("SVE sizeless type")) -+ if (!check_attr ("arm", "streaming_compatible")) - return 0; - return 1; - } -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 19b82b4f3..84215c8c3 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -157,10 +157,13 @@ - - #ifndef USED_FOR_TARGET - --/* Define an enum of all features (architectures and extensions). */ -+/* Define an enum of all features (ISA modes, architectures and extensions). -+ The ISA modes must come first. */ - enum class aarch64_feature : unsigned char { -+#define DEF_AARCH64_ISA_MODE(IDENT) IDENT, - #define AARCH64_OPT_EXTENSION(A, IDENT, C, D, E, F) IDENT, - #define AARCH64_ARCH(A, B, IDENT, D, E) IDENT, -+#include "aarch64-isa-modes.def" - #include "aarch64-option-extensions.def" - #include "aarch64-arches.def" - }; -@@ -169,16 +172,34 @@ enum class aarch64_feature : unsigned char { - #define HANDLE(IDENT) \ - constexpr auto AARCH64_FL_##IDENT \ - = aarch64_feature_flags (1) << int (aarch64_feature::IDENT); -+#define DEF_AARCH64_ISA_MODE(IDENT) HANDLE (IDENT) - #define AARCH64_OPT_EXTENSION(A, IDENT, C, D, E, F) HANDLE (IDENT) - #define AARCH64_ARCH(A, B, IDENT, D, E) HANDLE (IDENT) -+#include "aarch64-isa-modes.def" - #include "aarch64-option-extensions.def" - #include "aarch64-arches.def" - #undef HANDLE - -+constexpr auto AARCH64_FL_SM_STATE = AARCH64_FL_SM_ON | AARCH64_FL_SM_OFF; -+ -+constexpr unsigned int AARCH64_NUM_ISA_MODES = (0 -+#define DEF_AARCH64_ISA_MODE(IDENT) + 1 -+#include "aarch64-isa-modes.def" -+); -+ -+/* The mask of all ISA modes. */ -+constexpr auto AARCH64_FL_ISA_MODES -+ = (aarch64_feature_flags (1) << AARCH64_NUM_ISA_MODES) - 1; -+ -+/* The default ISA mode, for functions with no attributes that specify -+ something to the contrary. */ -+constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; -+ - #endif - - /* Macros to test ISA flags. */ - -+#define AARCH64_ISA_MODE (aarch64_isa_flags & AARCH64_FL_ISA_MODES) - #define AARCH64_ISA_CRC (aarch64_isa_flags & AARCH64_FL_CRC) - #define AARCH64_ISA_CRYPTO (aarch64_isa_flags & AARCH64_FL_CRYPTO) - #define AARCH64_ISA_FP (aarch64_isa_flags & AARCH64_FL_FP) -@@ -904,6 +925,7 @@ enum arm_pcs - typedef struct - { - enum arm_pcs pcs_variant; -+ aarch64_feature_flags isa_mode; - int aapcs_arg_processed; /* No need to lay out this argument again. */ - int aapcs_ncrn; /* Next Core register number. */ - int aapcs_nextncrn; /* Next next core register number. */ -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index c0977a3da..29a665e45 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -7017,7 +7017,8 @@ - { - if (TARGET_SVE) - { -- rtx abi = gen_int_mode (aarch64_tlsdesc_abi_id (), DImode); -+ rtx abi = aarch64_gen_callee_cookie (AARCH64_ISA_MODE, -+ aarch64_tlsdesc_abi_id ()); - rtx_insn *call - = emit_call_insn (gen_tlsdesc_small_sve_ (operands[0], abi)); - RTL_CONST_CALL_P (call) = 1; -diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64 -index 6a21a248f..10cd8f093 100644 ---- a/gcc/config/aarch64/t-aarch64 -+++ b/gcc/config/aarch64/t-aarch64 -@@ -20,7 +20,10 @@ - - TM_H += $(srcdir)/config/aarch64/aarch64-fusion-pairs.def \ - $(srcdir)/config/aarch64/aarch64-tuning-flags.def \ -- $(srcdir)/config/aarch64/aarch64-option-extensions.def -+ $(srcdir)/config/aarch64/aarch64-option-extensions.def \ -+ $(srcdir)/config/aarch64/aarch64-cores.def \ -+ $(srcdir)/config/aarch64/aarch64-isa-modes.def \ -+ $(srcdir)/config/aarch64/aarch64-arches.def - OPTIONS_H_EXTRA += $(srcdir)/config/aarch64/aarch64-cores.def \ - $(srcdir)/config/aarch64/aarch64-arches.def - -diff --git a/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp -new file mode 100644 -index 000000000..72fcd0bd9 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp -@@ -0,0 +1,40 @@ -+# Specific regression driver for AArch64 SME. -+# Copyright (C) 2009-2023 Free Software Foundation, Inc. -+# -+# This file is part of GCC. -+# -+# GCC is free software; you can redistribute it and/or modify it -+# under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3, or (at your option) -+# any later version. -+# -+# GCC is distributed in the hope that it will be useful, but -+# WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# . */ -+ -+# GCC testsuite that uses the `dg.exp' driver. -+ -+# Exit immediately if this isn't an AArch64 target. -+if {![istarget aarch64*-*-*] } { -+ return -+} -+ -+# Load support procs. -+load_lib g++-dg.exp -+ -+# Initialize `dg'. -+dg-init -+ -+aarch64-with-arch-dg-options "" { -+ # Main loop. -+ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ -+ "" "" -+} -+ -+# All done. -+dg-finish -diff --git a/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C b/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C -new file mode 100644 -index 000000000..032485adf ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C -@@ -0,0 +1,4 @@ -+/* { dg-options "-std=c++11 -pedantic-errors" } */ -+ -+void f1 () __arm_streaming; -+void f2 () __arm_streaming_compatible; -diff --git a/gcc/testsuite/g++.target/aarch64/sme/streaming_mode_1.C b/gcc/testsuite/g++.target/aarch64/sme/streaming_mode_1.C -new file mode 100644 -index 000000000..c3de726e7 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sme/streaming_mode_1.C -@@ -0,0 +1,142 @@ -+// { dg-options "" } -+ -+void sc_a () [[arm::streaming_compatible]]; -+void sc_a (); // { dg-error "ambiguating new declaration" "" { xfail *-*-* } } -+ -+void sc_b (); -+void sc_b () [[arm::streaming_compatible]]; // { dg-error "ambiguating new declaration" } -+ -+void sc_c () [[arm::streaming_compatible]]; -+void sc_c () {} // Inherits attribute from declaration (confusingly). -+ -+void sc_d (); -+void sc_d () [[arm::streaming_compatible]] {} // { dg-error "ambiguating new declaration" } -+ -+void sc_e () [[arm::streaming_compatible]] {} -+void sc_e (); // { dg-error "ambiguating new declaration" "" { xfail *-*-* } } -+ -+void sc_f () {} -+void sc_f () [[arm::streaming_compatible]]; // { dg-error "ambiguating new declaration" } -+ -+extern void (*sc_g) (); -+extern void (*sc_g) () [[arm::streaming_compatible]]; // { dg-error "conflicting declaration" } -+ -+extern void (*sc_h) () [[arm::streaming_compatible]]; -+extern void (*sc_h) (); // { dg-error "conflicting declaration" } -+ -+//---------------------------------------------------------------------------- -+ -+void s_a () [[arm::streaming]]; -+void s_a (); // { dg-error "ambiguating new declaration" "" { xfail *-*-* } } -+ -+void s_b (); -+void s_b () [[arm::streaming]]; // { dg-error "ambiguating new declaration" } -+ -+void s_c () [[arm::streaming]]; -+void s_c () {} // Inherits attribute from declaration (confusingly). -+ -+void s_d (); -+void s_d () [[arm::streaming]] {} // { dg-error "ambiguating new declaration" } -+ -+void s_e () [[arm::streaming]] {} -+void s_e (); // { dg-error "ambiguating new declaration" "" { xfail *-*-* } } -+ -+void s_f () {} -+void s_f () [[arm::streaming]]; // { dg-error "ambiguating new declaration" } -+ -+extern void (*s_g) (); -+extern void (*s_g) () [[arm::streaming]]; // { dg-error "conflicting declaration" } -+ -+extern void (*s_h) () [[arm::streaming]]; -+extern void (*s_h) (); // { dg-error "conflicting declaration" } -+ -+//---------------------------------------------------------------------------- -+ -+void mixed_a () [[arm::streaming]]; -+void mixed_a () [[arm::streaming_compatible]]; // { dg-error "ambiguating new declaration" } -+ -+void mixed_b () [[arm::streaming_compatible]]; -+void mixed_b () [[arm::streaming]]; // { dg-error "ambiguating new declaration" } -+ -+void mixed_c () [[arm::streaming]]; -+void mixed_c () [[arm::streaming_compatible]] {} // { dg-error "ambiguating new declaration" } -+ -+void mixed_d () [[arm::streaming_compatible]]; -+void mixed_d () [[arm::streaming]] {} // { dg-error "ambiguating new declaration" } -+ -+void mixed_e () [[arm::streaming]] {} -+void mixed_e () [[arm::streaming_compatible]]; // { dg-error "ambiguating new declaration" } -+ -+void mixed_f () [[arm::streaming_compatible]] {} -+void mixed_f () [[arm::streaming]]; // { dg-error "ambiguating new declaration" } -+ -+extern void (*mixed_g) () [[arm::streaming_compatible]]; -+extern void (*mixed_g) () [[arm::streaming]]; // { dg-error "conflicting declaration" } -+ -+extern void (*mixed_h) () [[arm::streaming]]; -+extern void (*mixed_h) () [[arm::streaming_compatible]]; // { dg-error "conflicting declaration" } -+ -+//---------------------------------------------------------------------------- -+ -+void contradiction_1 () [[arm::streaming, arm::streaming_compatible]]; // { dg-warning "conflicts with attribute" } -+void contradiction_2 () [[arm::streaming_compatible, arm::streaming]]; // { dg-warning "conflicts with attribute" } -+ -+int [[arm::streaming_compatible]] int_attr; // { dg-warning "attribute ignored" } -+void [[arm::streaming_compatible]] ret_attr (); // { dg-warning "attribute ignored" } -+void *[[arm::streaming]] ptr_attr; // { dg-warning "only applies to function types" } -+ -+typedef void s_callback () [[arm::streaming]]; -+typedef void sc_callback () [[arm::streaming_compatible]]; -+ -+typedef void contradiction_callback_1 () [[arm::streaming, arm::streaming_compatible]]; // { dg-warning "conflicts with attribute" } -+typedef void contradiction_callback_2 () [[arm::streaming_compatible, arm::streaming]]; // { dg-warning "conflicts with attribute" } -+ -+void (*contradiction_callback_ptr_1) () [[arm::streaming, arm::streaming_compatible]]; // { dg-warning "conflicts with attribute" } -+void (*contradiction_callback_ptr_2) () [[arm::streaming_compatible, arm::streaming]]; // { dg-warning "conflicts with attribute" } -+ -+struct s { -+ void (*contradiction_callback_ptr_1) () [[arm::streaming, arm::streaming_compatible]]; // { dg-warning "conflicts with attribute" } -+ void (*contradiction_callback_ptr_2) () [[arm::streaming_compatible, arm::streaming]]; // { dg-warning "conflicts with attribute" } -+}; -+ -+//---------------------------------------------------------------------------- -+ -+void keyword_ok_1 () __arm_streaming; -+void keyword_ok_1 () __arm_streaming; -+ -+void keyword_ok_2 () __arm_streaming; -+void keyword_ok_2 () [[arm::streaming]]; -+ -+void keyword_ok_3 () [[arm::streaming]]; -+void keyword_ok_3 () __arm_streaming; -+ -+void keyword_ok_4 () __arm_streaming [[arm::streaming]]; -+ -+void keyword_ok_5 () __arm_streaming_compatible; -+void keyword_ok_5 () [[arm::streaming_compatible]]; -+ -+//---------------------------------------------------------------------------- -+ -+void keyword_contradiction_1 () __arm_streaming; -+void keyword_contradiction_1 (); // { dg-error "ambiguating new declaration" "" { xfail *-*-* } } -+ -+void keyword_contradiction_2 (); -+void keyword_contradiction_2 () __arm_streaming; // { dg-error "ambiguating new declaration" } -+ -+void keyword_contradiction_3 () __arm_streaming; -+void keyword_contradiction_3 () [[arm::streaming_compatible]]; // { dg-error "ambiguating new declaration" } -+ -+void keyword_contradiction_4 () [[arm::streaming_compatible]]; -+void keyword_contradiction_4 () __arm_streaming; // { dg-error "ambiguating new declaration" } -+ -+//---------------------------------------------------------------------------- -+ -+struct s1 -+{ -+ virtual void f () [[arm::streaming]]; -+}; -+ -+struct s2 : public s1 -+{ -+ void f () override; // { dg-error "conflicting type attributes" } -+}; -diff --git a/gcc/testsuite/g++.target/aarch64/sme/streaming_mode_2.C b/gcc/testsuite/g++.target/aarch64/sme/streaming_mode_2.C -new file mode 100644 -index 000000000..f2dd2db9b ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sme/streaming_mode_2.C -@@ -0,0 +1,25 @@ -+// { dg-options "" } -+ -+void sc_fn () [[arm::streaming_compatible]]; -+void s_fn () [[arm::streaming]]; -+void ns_fn (); -+ -+void (*sc_fn_ptr) () [[arm::streaming_compatible]]; -+void (*s_fn_ptr) () [[arm::streaming]]; -+void (*ns_fn_ptr) (); -+ -+void -+f () -+{ -+ sc_fn_ptr = sc_fn; -+ sc_fn_ptr = s_fn; // { dg-error "invalid conversion" } -+ sc_fn_ptr = ns_fn; // { dg-error "invalid conversion" } -+ -+ s_fn_ptr = sc_fn; // { dg-error "invalid conversion" } -+ s_fn_ptr = s_fn; -+ s_fn_ptr = ns_fn; // { dg-error "invalid conversion" } -+ -+ ns_fn_ptr = sc_fn; // { dg-error "invalid conversion" } -+ ns_fn_ptr = s_fn; // { dg-error "invalid conversion" } -+ ns_fn_ptr = ns_fn; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/auto-init-1.c b/gcc/testsuite/gcc.target/aarch64/auto-init-1.c -index 0fa470880..45bb02561 100644 ---- a/gcc/testsuite/gcc.target/aarch64/auto-init-1.c -+++ b/gcc/testsuite/gcc.target/aarch64/auto-init-1.c -@@ -29,4 +29,5 @@ void foo() - return; - } - --/* { dg-final { scan-rtl-dump-times "const_int 0" 11 "expand" } } */ -+/* Includes 1 for the call instruction and 1 for a nop. */ -+/* { dg-final { scan-rtl-dump-times "const_int 0" 10 "expand" } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp -new file mode 100644 -index 000000000..c990e5924 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp -@@ -0,0 +1,40 @@ -+# Specific regression driver for AArch64 SME. -+# Copyright (C) 2009-2023 Free Software Foundation, Inc. -+# -+# This file is part of GCC. -+# -+# GCC is free software; you can redistribute it and/or modify it -+# under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3, or (at your option) -+# any later version. -+# -+# GCC is distributed in the hope that it will be useful, but -+# WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# . */ -+ -+# GCC testsuite that uses the `dg.exp' driver. -+ -+# Exit immediately if this isn't an AArch64 target. -+if {![istarget aarch64*-*-*] } { -+ return -+} -+ -+# Load support procs. -+load_lib gcc-dg.exp -+ -+# Initialize `dg'. -+dg-init -+ -+aarch64-with-arch-dg-options "" { -+ # Main loop. -+ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ -+ "" "" -+} -+ -+# All done. -+dg-finish -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c b/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c -new file mode 100644 -index 000000000..8f1b83676 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c -@@ -0,0 +1,4 @@ -+/* { dg-options "-std=c90 -pedantic-errors" } */ -+ -+void f1 () __arm_streaming; -+void f2 () __arm_streaming_compatible; -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_1.c b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_1.c -new file mode 100644 -index 000000000..8874b05b8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_1.c -@@ -0,0 +1,130 @@ -+// { dg-options "" } -+ -+void sc_a () [[arm::streaming_compatible]]; -+void sc_a (); // { dg-error "conflicting types" } -+ -+void sc_b (); -+void sc_b () [[arm::streaming_compatible]]; // { dg-error "conflicting types" } -+ -+void sc_c () [[arm::streaming_compatible]]; -+void sc_c () {} // Inherits attribute from declaration (confusingly). -+ -+void sc_d (); -+void sc_d () [[arm::streaming_compatible]] {} // { dg-error "conflicting types" } -+ -+void sc_e () [[arm::streaming_compatible]] {} -+void sc_e (); // { dg-error "conflicting types" } -+ -+void sc_f () {} -+void sc_f () [[arm::streaming_compatible]]; // { dg-error "conflicting types" } -+ -+extern void (*sc_g) (); -+extern void (*sc_g) () [[arm::streaming_compatible]]; // { dg-error "conflicting types" } -+ -+extern void (*sc_h) () [[arm::streaming_compatible]]; -+extern void (*sc_h) (); // { dg-error "conflicting types" } -+ -+//---------------------------------------------------------------------------- -+ -+void s_a () [[arm::streaming]]; -+void s_a (); // { dg-error "conflicting types" } -+ -+void s_b (); -+void s_b () [[arm::streaming]]; // { dg-error "conflicting types" } -+ -+void s_c () [[arm::streaming]]; -+void s_c () {} // Inherits attribute from declaration (confusingly). -+ -+void s_d (); -+void s_d () [[arm::streaming]] {} // { dg-error "conflicting types" } -+ -+void s_e () [[arm::streaming]] {} -+void s_e (); // { dg-error "conflicting types" } -+ -+void s_f () {} -+void s_f () [[arm::streaming]]; // { dg-error "conflicting types" } -+ -+extern void (*s_g) (); -+extern void (*s_g) () [[arm::streaming]]; // { dg-error "conflicting types" } -+ -+extern void (*s_h) () [[arm::streaming]]; -+extern void (*s_h) (); // { dg-error "conflicting types" } -+ -+//---------------------------------------------------------------------------- -+ -+void mixed_a () [[arm::streaming]]; -+void mixed_a () [[arm::streaming_compatible]]; // { dg-error "conflicting types" } -+ -+void mixed_b () [[arm::streaming_compatible]]; -+void mixed_b () [[arm::streaming]]; // { dg-error "conflicting types" } -+ -+void mixed_c () [[arm::streaming]]; -+void mixed_c () [[arm::streaming_compatible]] {} // { dg-error "conflicting types" } -+ -+void mixed_d () [[arm::streaming_compatible]]; -+void mixed_d () [[arm::streaming]] {} // { dg-error "conflicting types" } -+ -+void mixed_e () [[arm::streaming]] {} -+void mixed_e () [[arm::streaming_compatible]]; // { dg-error "conflicting types" } -+ -+void mixed_f () [[arm::streaming_compatible]] {} -+void mixed_f () [[arm::streaming]]; // { dg-error "conflicting types" } -+ -+extern void (*mixed_g) () [[arm::streaming_compatible]]; -+extern void (*mixed_g) () [[arm::streaming]]; // { dg-error "conflicting types" } -+ -+extern void (*mixed_h) () [[arm::streaming]]; -+extern void (*mixed_h) () [[arm::streaming_compatible]]; // { dg-error "conflicting types" } -+ -+//---------------------------------------------------------------------------- -+ -+void contradiction_1 () [[arm::streaming, arm::streaming_compatible]]; // { dg-warning "conflicts with attribute" } -+void contradiction_2 () [[arm::streaming_compatible, arm::streaming]]; // { dg-warning "conflicts with attribute" } -+ -+int [[arm::streaming_compatible]] int_attr; // { dg-warning "only applies to function types" } -+void [[arm::streaming_compatible]] ret_attr (); // { dg-warning "only applies to function types" } -+void *[[arm::streaming]] ptr_attr; // { dg-warning "only applies to function types" } -+ -+typedef void s_callback () [[arm::streaming]]; -+typedef void sc_callback () [[arm::streaming_compatible]]; -+ -+typedef void contradiction_callback_1 () [[arm::streaming, arm::streaming_compatible]]; // { dg-warning "conflicts with attribute" } -+typedef void contradiction_callback_2 () [[arm::streaming_compatible, arm::streaming]]; // { dg-warning "conflicts with attribute" } -+ -+void (*contradiction_callback_ptr_1) () [[arm::streaming, arm::streaming_compatible]]; // { dg-warning "conflicts with attribute" } -+void (*contradiction_callback_ptr_2) () [[arm::streaming_compatible, arm::streaming]]; // { dg-warning "conflicts with attribute" } -+ -+struct s { -+ void (*contradiction_callback_ptr_1) () [[arm::streaming, arm::streaming_compatible]]; // { dg-warning "conflicts with attribute" } -+ void (*contradiction_callback_ptr_2) () [[arm::streaming_compatible, arm::streaming]]; // { dg-warning "conflicts with attribute" } -+}; -+ -+//---------------------------------------------------------------------------- -+ -+void keyword_ok_1 () __arm_streaming; -+void keyword_ok_1 () __arm_streaming; -+ -+void keyword_ok_2 () __arm_streaming; -+void keyword_ok_2 () [[arm::streaming]]; -+ -+void keyword_ok_3 () [[arm::streaming]]; -+void keyword_ok_3 () __arm_streaming; -+ -+void keyword_ok_4 () __arm_streaming [[arm::streaming]]; -+ -+void keyword_ok_5 () __arm_streaming_compatible; -+void keyword_ok_5 () [[arm::streaming_compatible]]; -+ -+//---------------------------------------------------------------------------- -+ -+void keyword_contradiction_1 () __arm_streaming; -+void keyword_contradiction_1 (); // { dg-error "conflicting types" } -+ -+void keyword_contradiction_2 (); -+void keyword_contradiction_2 () __arm_streaming; // { dg-error "conflicting types" } -+ -+void keyword_contradiction_3 () __arm_streaming; -+void keyword_contradiction_3 () [[arm::streaming_compatible]]; // { dg-error "conflicting types" } -+ -+void keyword_contradiction_4 () [[arm::streaming_compatible]]; -+void keyword_contradiction_4 () __arm_streaming; // { dg-error "conflicting types" } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c -new file mode 100644 -index 000000000..e8be0f821 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c -@@ -0,0 +1,25 @@ -+// { dg-options "" } -+ -+void sc_fn () [[arm::streaming_compatible]]; -+void s_fn () [[arm::streaming]]; -+void ns_fn (); -+ -+void (*sc_fn_ptr) () [[arm::streaming_compatible]]; -+void (*s_fn_ptr) () [[arm::streaming]]; -+void (*ns_fn_ptr) (); -+ -+void -+f () -+{ -+ sc_fn_ptr = sc_fn; -+ sc_fn_ptr = s_fn; // { dg-error "incompatible pointer type" } -+ sc_fn_ptr = ns_fn; // { dg-error "incompatible pointer type" } -+ -+ s_fn_ptr = sc_fn; // { dg-error "incompatible pointer type" } -+ s_fn_ptr = s_fn; -+ s_fn_ptr = ns_fn; // { dg-error "incompatible pointer type" } -+ -+ ns_fn_ptr = sc_fn; // { dg-error "incompatible pointer type" } -+ ns_fn_ptr = s_fn; // { dg-error "incompatible pointer type" } -+ ns_fn_ptr = ns_fn; -+} --- -2.33.0 - diff --git a/0175-Backport-SME-aarch64-Add-sme.patch b/0175-Backport-SME-aarch64-Add-sme.patch deleted file mode 100644 index fc3ef34..0000000 --- a/0175-Backport-SME-aarch64-Add-sme.patch +++ /dev/null @@ -1,330 +0,0 @@ -From c097d9ffc7dd8f90f78eb3b994f3691f4c8f812d Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:23 +0000 -Subject: [PATCH 076/157] [Backport][SME] aarch64: Add +sme - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7e04bd1fadf3410c3d24b56f650a52ff53d01a3c - -This patch adds the +sme ISA feature and requires it to be present -when compiling arm_streaming code. (arm_streaming_compatible code -does not necessarily assume the presence of SME. It just has to -work when SME is present and streaming mode is enabled.) - -gcc/ - * doc/invoke.texi: Document SME. - * doc/sourcebuild.texi: Document aarch64_sve. - * config/aarch64/aarch64-option-extensions.def (sme): Define. - * config/aarch64/aarch64.h (AARCH64_ISA_SME): New macro. - (TARGET_SME): Likewise. - * config/aarch64/aarch64.cc (aarch64_override_options_internal): - Ensure that SME is present when compiling streaming code. - -gcc/testsuite/ - * lib/target-supports.exp (check_effective_target_aarch64_sme): New - target test. - * gcc.target/aarch64/sme/aarch64-sme.exp: Force SME to be enabled - if it isn't by default. - * g++.target/aarch64/sme/aarch64-sme.exp: Likewise. - * gcc.target/aarch64/sme/streaming_mode_3.c: New test. ---- - .../aarch64/aarch64-option-extensions.def | 2 + - gcc/config/aarch64/aarch64.cc | 33 ++++++++++ - gcc/config/aarch64/aarch64.h | 5 ++ - gcc/doc/invoke.texi | 2 + - gcc/doc/sourcebuild.texi | 2 + - .../g++.target/aarch64/sme/aarch64-sme.exp | 10 ++- - .../gcc.target/aarch64/sme/aarch64-sme.exp | 10 ++- - .../gcc.target/aarch64/sme/streaming_mode_3.c | 63 +++++++++++++++++++ - .../gcc.target/aarch64/sme/streaming_mode_4.c | 22 +++++++ - gcc/testsuite/lib/target-supports.exp | 12 ++++ - 10 files changed, 157 insertions(+), 4 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_3.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_4.c - -diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def -index bdf4baf30..faee64a79 100644 ---- a/gcc/config/aarch64/aarch64-option-extensions.def -+++ b/gcc/config/aarch64/aarch64-option-extensions.def -@@ -149,4 +149,6 @@ AARCH64_OPT_EXTENSION("ls64", LS64, (), (), (), "") - - AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "") - -+AARCH64_OPT_EXTENSION("sme", SME, (BF16, SVE2), (), (), "sme") -+ - #undef AARCH64_OPT_EXTENSION -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 904166b21..8f8395201 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -11648,6 +11648,23 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) - return true; - } - -+/* Implement TARGET_START_CALL_ARGS. */ -+ -+static void -+aarch64_start_call_args (cumulative_args_t ca_v) -+{ -+ CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v); -+ -+ if (!TARGET_SME && (ca->isa_mode & AARCH64_FL_SM_ON)) -+ { -+ error ("calling a streaming function requires the ISA extension %qs", -+ "sme"); -+ inform (input_location, "you can enable %qs using the command-line" -+ " option %<-march%>, or by using the %" -+ " attribute or pragma", "sme"); -+ } -+} -+ - /* This function is used by the call expanders of the machine description. - RESULT is the register in which the result is returned. It's NULL for - "call" and "sibcall". -@@ -18194,6 +18211,19 @@ aarch64_override_options_internal (struct gcc_options *opts) - && !fixed_regs[R18_REGNUM]) - error ("%<-fsanitize=shadow-call-stack%> requires %<-ffixed-x18%>"); - -+ if ((opts->x_aarch64_isa_flags & AARCH64_FL_SM_ON) -+ && !(opts->x_aarch64_isa_flags & AARCH64_FL_SME)) -+ { -+ error ("streaming functions require the ISA extension %qs", "sme"); -+ inform (input_location, "you can enable %qs using the command-line" -+ " option %<-march%>, or by using the %" -+ " attribute or pragma", "sme"); -+ opts->x_target_flags &= ~MASK_GENERAL_REGS_ONLY; -+ auto new_flags = (opts->x_aarch64_asm_isa_flags -+ | feature_deps::SME ().enable); -+ aarch64_set_asm_isa_flags (opts, new_flags); -+ } -+ - initialize_aarch64_code_model (opts); - initialize_aarch64_tls_size (opts); - -@@ -28159,6 +28189,9 @@ aarch64_get_v16qi_mode () - #undef TARGET_FUNCTION_VALUE_REGNO_P - #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p - -+#undef TARGET_START_CALL_ARGS -+#define TARGET_START_CALL_ARGS aarch64_start_call_args -+ - #undef TARGET_GIMPLE_FOLD_BUILTIN - #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin - -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 84215c8c3..dd2de4e88 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -214,6 +214,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - #define AARCH64_ISA_SVE2_BITPERM (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM) - #define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3) - #define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4) -+#define AARCH64_ISA_SME (aarch64_isa_flags & AARCH64_FL_SME) - #define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3A) - #define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD) - #define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES) -@@ -292,6 +293,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - /* SVE2 SM4 instructions, enabled through +sve2-sm4. */ - #define TARGET_SVE2_SM4 (AARCH64_ISA_SVE2_SM4) - -+/* SME instructions, enabled through +sme. Note that this does not -+ imply anything about the state of PSTATE.SM. */ -+#define TARGET_SME (AARCH64_ISA_SME) -+ - /* ARMv8.3-A features. */ - #define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A) - -diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi -index 53709b246..2420b05d9 100644 ---- a/gcc/doc/invoke.texi -+++ b/gcc/doc/invoke.texi -@@ -19478,6 +19478,8 @@ Enable the instructions to accelerate memory operations like @code{memcpy}, - Enable the Flag Manipulation instructions Extension. - @item pauth - Enable the Pointer Authentication Extension. -+@item sme -+Enable the Scalable Matrix Extension. - - @end table - -diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi -index 454fae11a..80936a0eb 100644 ---- a/gcc/doc/sourcebuild.texi -+++ b/gcc/doc/sourcebuild.texi -@@ -2277,6 +2277,8 @@ AArch64 target which generates instruction sequences for big endian. - @item aarch64_small_fpic - Binutils installed on test system supports relocation types required by -fpic - for AArch64 small memory model. -+@item aarch64_sme -+AArch64 target that generates instructions for SME. - @item aarch64_sve_hw - AArch64 target that is able to generate and execute SVE code (regardless of - whether it does so by default). -diff --git a/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp -index 72fcd0bd9..1c3e69cde 100644 ---- a/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp -+++ b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp -@@ -30,10 +30,16 @@ load_lib g++-dg.exp - # Initialize `dg'. - dg-init - --aarch64-with-arch-dg-options "" { -+if { [check_effective_target_aarch64_sme] } { -+ set sme_flags "" -+} else { -+ set sme_flags "-march=armv9-a+sme" -+} -+ -+aarch64-with-arch-dg-options $sme_flags { - # Main loop. - dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ -- "" "" -+ "" $sme_flags - } - - # All done. -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp -index c990e5924..011310e80 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp -+++ b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp -@@ -30,10 +30,16 @@ load_lib gcc-dg.exp - # Initialize `dg'. - dg-init - --aarch64-with-arch-dg-options "" { -+if { [check_effective_target_aarch64_sme] } { -+ set sme_flags "" -+} else { -+ set sme_flags "-march=armv9-a+sme" -+} -+ -+aarch64-with-arch-dg-options $sme_flags { - # Main loop. - dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ -- "" "" -+ "" $sme_flags - } - - # All done. -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_3.c b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_3.c -new file mode 100644 -index 000000000..45ec92321 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_3.c -@@ -0,0 +1,63 @@ -+// { dg-options "" } -+ -+#pragma GCC target "+nosme" -+ -+void sc_a () [[arm::streaming_compatible]] {} -+void s_a () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" } -+void ns_a () {} -+ -+void sc_b () [[arm::streaming_compatible]] {} -+void ns_b () {} -+void s_b () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" } -+ -+void sc_c () [[arm::streaming_compatible]] {} -+void sc_d () [[arm::streaming_compatible]] {} -+ -+void s_c () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" } -+void s_d () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" } -+ -+void ns_c () {} -+void ns_d () {} -+ -+void sc_e () [[arm::streaming_compatible]]; -+void s_e () [[arm::streaming]]; -+void ns_e (); -+ -+#pragma GCC target "+sme" -+ -+void sc_f () [[arm::streaming_compatible]] {} -+void s_f () [[arm::streaming]] {} -+void ns_f () {} -+ -+void sc_g () [[arm::streaming_compatible]] {} -+void ns_g () {} -+void s_g () [[arm::streaming]] {} -+ -+void sc_h () [[arm::streaming_compatible]] {} -+void sc_i () [[arm::streaming_compatible]] {} -+ -+void s_h () [[arm::streaming]] {} -+void s_i () [[arm::streaming]] {} -+ -+void ns_h () {} -+void ns_i () {} -+ -+void sc_j () [[arm::streaming_compatible]]; -+void s_j () [[arm::streaming]]; -+void ns_j (); -+ -+#pragma GCC target "+sme" -+ -+void sc_k () [[arm::streaming_compatible]] {} -+ -+#pragma GCC target "+nosme" -+#pragma GCC target "+sme" -+ -+void s_k () [[arm::streaming]] {} -+ -+#pragma GCC target "+nosme" -+#pragma GCC target "+sme" -+ -+void ns_k () {} -+ -+#pragma GCC target "+nosme" -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_4.c b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_4.c -new file mode 100644 -index 000000000..50e92f2e1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_4.c -@@ -0,0 +1,22 @@ -+// { dg-options "-mgeneral-regs-only" } -+ -+void sc_a () [[arm::streaming_compatible]] {} -+void s_a () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" } -+void ns_a () {} -+ -+void sc_b () [[arm::streaming_compatible]] {} -+void ns_b () {} -+void s_b () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" } -+ -+void sc_c () [[arm::streaming_compatible]] {} -+void sc_d () [[arm::streaming_compatible]] {} -+ -+void s_c () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" } -+void s_d () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" } -+ -+void ns_c () {} -+void ns_d () {} -+ -+void sc_e () [[arm::streaming_compatible]]; -+void s_e () [[arm::streaming]]; -+void ns_e (); -diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp -index bd89d4f52..e2a9ef5fa 100644 ---- a/gcc/testsuite/lib/target-supports.exp -+++ b/gcc/testsuite/lib/target-supports.exp -@@ -3887,6 +3887,18 @@ proc aarch64_sve_bits { } { - }] - } - -+# Return 1 if this is an AArch64 target that generates instructions for SME. -+proc check_effective_target_aarch64_sme { } { -+ if { ![istarget aarch64*-*-*] } { -+ return 0 -+ } -+ return [check_no_compiler_messages aarch64_sme assembly { -+ #if !defined (__ARM_FEATURE_SME) -+ #error FOO -+ #endif -+ }] -+} -+ - # Return 1 if this is a compiler supporting ARC atomic operations - proc check_effective_target_arc_atomic { } { - return [check_no_compiler_messages arc_atomic assembly { --- -2.33.0 - diff --git a/0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch b/0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch deleted file mode 100644 index 3bd87e9..0000000 --- a/0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch +++ /dev/null @@ -1,168 +0,0 @@ -From d8233e19aae2272c4863de5e8d61d49d3147e807 Mon Sep 17 00:00:00 2001 -From: Kyrylo Tkachov -Date: Thu, 1 Jun 2023 09:37:06 +0100 -Subject: [PATCH 077/157] [Backport][SME] aarch64: Add =r,m and =m,r - alternatives to 64-bit vector move patterns - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=12e71b593ea0c64d919df525cd75ea10b7be8a4b - -We can use the X registers to load and store 64-bit vector modes, we just need to add the alternatives -to the mov patterns. This straightforward patch does that and for the pair variants too. -For the testcase in the code we now generate the optimal assembly without any superfluous -GP<->SIMD moves. - -Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. - -gcc/ChangeLog: - - * config/aarch64/aarch64-simd.md (*aarch64_simd_mov): - Add =r,m and =r,m alternatives. - (load_pair): Likewise. - (vec_store_pair): Likewise. - -gcc/testsuite/ChangeLog: - - * gcc.target/aarch64/xreg-vec-modes_1.c: New test. ---- - gcc/config/aarch64/aarch64-simd.md | 40 ++++++++++-------- - .../gcc.target/aarch64/xreg-vec-modes_1.c | 42 +++++++++++++++++++ - 2 files changed, 65 insertions(+), 17 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c - -diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md -index 2d688edf5..b5c52ba16 100644 ---- a/gcc/config/aarch64/aarch64-simd.md -+++ b/gcc/config/aarch64/aarch64-simd.md -@@ -116,26 +116,28 @@ - - (define_insn "*aarch64_simd_mov" - [(set (match_operand:VDMOV 0 "nonimmediate_operand" -- "=w, m, m, w, ?r, ?w, ?r, w, w") -+ "=w, r, m, m, m, w, ?r, ?w, ?r, w, w") - (match_operand:VDMOV 1 "general_operand" -- "m, Dz, w, w, w, r, r, Dn, Dz"))] -+ "m, m, Dz, w, r, w, w, r, r, Dn, Dz"))] - "TARGET_FLOAT - && (register_operand (operands[0], mode) - || aarch64_simd_reg_or_zero (operands[1], mode))" - "@ - ldr\t%d0, %1 -+ ldr\t%x0, %1 - str\txzr, %0 - str\t%d1, %0 -+ str\t%x1, %0 - * return TARGET_SIMD ? \"mov\t%0., %1.\" : \"fmov\t%d0, %d1\"; - * return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\"; - fmov\t%d0, %1 - mov\t%0, %1 - * return aarch64_output_simd_mov_immediate (operands[1], 64); - fmov\t%d0, xzr" -- [(set_attr "type" "neon_load1_1reg, store_8, neon_store1_1reg,\ -- neon_logic, neon_to_gp, f_mcr,\ -+ [(set_attr "type" "neon_load1_1reg, load_8, store_8, neon_store1_1reg,\ -+ store_8, neon_logic, neon_to_gp, f_mcr,\ - mov_reg, neon_move, f_mcr") -- (set_attr "arch" "*,*,*,*,*,*,*,simd,*")] -+ (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")] - ) - - (define_insn "*aarch64_simd_mov" -@@ -177,31 +179,35 @@ - ) - - (define_insn "load_pair" -- [(set (match_operand:DREG 0 "register_operand" "=w") -- (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump")) -- (set (match_operand:DREG2 2 "register_operand" "=w") -- (match_operand:DREG2 3 "memory_operand" "m"))] -+ [(set (match_operand:DREG 0 "register_operand" "=w,r") -+ (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump,Ump")) -+ (set (match_operand:DREG2 2 "register_operand" "=w,r") -+ (match_operand:DREG2 3 "memory_operand" "m,m"))] - "TARGET_FLOAT - && rtx_equal_p (XEXP (operands[3], 0), - plus_constant (Pmode, - XEXP (operands[1], 0), - GET_MODE_SIZE (mode)))" -- "ldp\\t%d0, %d2, %z1" -- [(set_attr "type" "neon_ldp")] -+ "@ -+ ldp\t%d0, %d2, %z1 -+ ldp\t%x0, %x2, %z1" -+ [(set_attr "type" "neon_ldp,load_16")] - ) - - (define_insn "vec_store_pair" -- [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump") -- (match_operand:DREG 1 "register_operand" "w")) -- (set (match_operand:DREG2 2 "memory_operand" "=m") -- (match_operand:DREG2 3 "register_operand" "w"))] -+ [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump,Ump") -+ (match_operand:DREG 1 "register_operand" "w,r")) -+ (set (match_operand:DREG2 2 "memory_operand" "=m,m") -+ (match_operand:DREG2 3 "register_operand" "w,r"))] - "TARGET_FLOAT - && rtx_equal_p (XEXP (operands[2], 0), - plus_constant (Pmode, - XEXP (operands[0], 0), - GET_MODE_SIZE (mode)))" -- "stp\\t%d1, %d3, %z0" -- [(set_attr "type" "neon_stp")] -+ "@ -+ stp\t%d1, %d3, %z0 -+ stp\t%x1, %x3, %z0" -+ [(set_attr "type" "neon_stp,store_16")] - ) - - (define_insn "load_pair" -diff --git a/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c -new file mode 100644 -index 000000000..fc4dcb1ad ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c -@@ -0,0 +1,42 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+typedef unsigned int v2si __attribute__((vector_size (8))); -+ -+#define force_gp(V1) asm volatile ("" \ -+ : "=r"(V1) \ -+ : "r"(V1) \ -+ : /* No clobbers */); -+ -+/* -+** foo: -+** ldr (x[0-9]+), \[x1\] -+** str \1, \[x0\] -+** ret -+*/ -+ -+void -+foo (v2si *a, v2si *b) -+{ -+ v2si tmp = *b; -+ force_gp (tmp); -+ *a = tmp; -+} -+ -+/* -+** foo2: -+** ldp (x[0-9]+), (x[0-9]+), \[x0\] -+** stp \1, \2, \[x1\] -+** ret -+*/ -+void -+foo2 (v2si *a, v2si *b) -+{ -+ v2si t1 = *a; -+ v2si t2 = a[1]; -+ force_gp (t1); -+ force_gp (t2); -+ *b = t1; -+ b[1] = t2; -+} --- -2.33.0 - diff --git a/0177-Backport-SME-AArch64-Rewrite-simd-move-immediate-pat.patch b/0177-Backport-SME-AArch64-Rewrite-simd-move-immediate-pat.patch deleted file mode 100644 index 1e89f07..0000000 --- a/0177-Backport-SME-AArch64-Rewrite-simd-move-immediate-pat.patch +++ /dev/null @@ -1,167 +0,0 @@ -From 7d40978965ff893871a79f5f624f54ae02a34a8b Mon Sep 17 00:00:00 2001 -From: Tamar Christina -Date: Wed, 18 Oct 2023 09:34:01 +0100 -Subject: [PATCH 078/157] [Backport][SME] AArch64: Rewrite simd move immediate - patterns to new syntax - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=04227acbe9e6c60d1e314a6b4f2d949c07f30baa - -This rewrites the simd MOV patterns to use the new compact syntax. -No change in semantics is expected. This will be needed in follow on patches. - -This also merges the splits into the define_insn which will also be needed soon. - -gcc/ChangeLog: - - PR tree-optimization/109154 - * config/aarch64/aarch64-simd.md (*aarch64_simd_mov): - Rewrite to new syntax. - (*aarch64_simd_mov" -- [(set (match_operand:VDMOV 0 "nonimmediate_operand" -- "=w, r, m, m, m, w, ?r, ?w, ?r, w, w") -- (match_operand:VDMOV 1 "general_operand" -- "m, m, Dz, w, r, w, w, r, r, Dn, Dz"))] -+ [(set (match_operand:VDMOV 0 "nonimmediate_operand") -+ (match_operand:VDMOV 1 "general_operand"))] - "TARGET_FLOAT - && (register_operand (operands[0], mode) - || aarch64_simd_reg_or_zero (operands[1], mode))" -- "@ -- ldr\t%d0, %1 -- ldr\t%x0, %1 -- str\txzr, %0 -- str\t%d1, %0 -- str\t%x1, %0 -- * return TARGET_SIMD ? \"mov\t%0., %1.\" : \"fmov\t%d0, %d1\"; -- * return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\"; -- fmov\t%d0, %1 -- mov\t%0, %1 -- * return aarch64_output_simd_mov_immediate (operands[1], 64); -- fmov\t%d0, xzr" -- [(set_attr "type" "neon_load1_1reg, load_8, store_8, neon_store1_1reg,\ -- store_8, neon_logic, neon_to_gp, f_mcr,\ -- mov_reg, neon_move, f_mcr") -- (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")] --) -- --(define_insn "*aarch64_simd_mov" -- [(set (match_operand:VQMOV 0 "nonimmediate_operand" -- "=w, Umn, m, w, ?r, ?w, ?r, w, w") -- (match_operand:VQMOV 1 "general_operand" -- "m, Dz, w, w, w, r, r, Dn, Dz"))] -+ {@ [cons: =0, 1; attrs: type, arch] -+ [w , m ; neon_load1_1reg , * ] ldr\t%d0, %1 -+ [r , m ; load_8 , * ] ldr\t%x0, %1 -+ [m , Dz; store_8 , * ] str\txzr, %0 -+ [m , w ; neon_store1_1reg, * ] str\t%d1, %0 -+ [m , r ; store_8 , * ] str\t%x1, %0 -+ [w , w ; neon_logic , simd] mov\t%0., %1. -+ [w , w ; neon_logic , * ] fmov\t%d0, %d1 -+ [?r, w ; neon_to_gp , simd] umov\t%0, %1.d[0] -+ [?r, w ; neon_to_gp , * ] fmov\t%x0, %d1 -+ [?w, r ; f_mcr , * ] fmov\t%d0, %1 -+ [?r, r ; mov_reg , * ] mov\t%0, %1 -+ [w , Dn; neon_move , simd] << aarch64_output_simd_mov_immediate (operands[1], 64); -+ [w , Dz; f_mcr , * ] fmov\t%d0, xzr -+ } -+) -+ -+(define_insn_and_split "*aarch64_simd_mov" -+ [(set (match_operand:VQMOV 0 "nonimmediate_operand") -+ (match_operand:VQMOV 1 "general_operand"))] - "TARGET_FLOAT - && (register_operand (operands[0], mode) - || aarch64_simd_reg_or_zero (operands[1], mode))" -- "@ -- ldr\t%q0, %1 -- stp\txzr, xzr, %0 -- str\t%q1, %0 -- mov\t%0., %1. -- # -- # -- # -- * return aarch64_output_simd_mov_immediate (operands[1], 128); -- fmov\t%d0, xzr" -- [(set_attr "type" "neon_load1_1reg, store_16, neon_store1_1reg,\ -- neon_logic, multiple, multiple,\ -- multiple, neon_move, fmov") -- (set_attr "length" "4,4,4,4,8,8,8,4,4") -- (set_attr "arch" "*,*,*,simd,*,*,*,simd,*")] -+ {@ [cons: =0, 1; attrs: type, arch, length] -+ [w , m ; neon_load1_1reg , * , 4] ldr\t%q0, %1 -+ [Umn, Dz; store_16 , * , 4] stp\txzr, xzr, %0 -+ [m , w ; neon_store1_1reg, * , 4] str\t%q1, %0 -+ [w , w ; neon_logic , simd, 4] mov\t%0., %1. -+ [?r , w ; multiple , * , 8] # -+ [?w , r ; multiple , * , 8] # -+ [?r , r ; multiple , * , 8] # -+ [w , Dn; neon_move , simd, 4] << aarch64_output_simd_mov_immediate (operands[1], 128); -+ [w , Dz; fmov , * , 4] fmov\t%d0, xzr -+ } -+ "&& reload_completed -+ && (REG_P (operands[0]) -+ && REG_P (operands[1]) -+ && !(FP_REGNUM_P (REGNO (operands[0])) -+ && FP_REGNUM_P (REGNO (operands[1]))))" -+ [(const_int 0)] -+ { -+ if (GP_REGNUM_P (REGNO (operands[0])) -+ && GP_REGNUM_P (REGNO (operands[1]))) -+ aarch64_simd_emit_reg_reg_move (operands, DImode, 2); -+ else -+ aarch64_split_simd_move (operands[0], operands[1]); -+ DONE; -+ } - ) - - ;; When storing lane zero we can use the normal STR and its more permissive -@@ -238,33 +243,6 @@ - [(set_attr "type" "neon_stp_q")] - ) - -- --(define_split -- [(set (match_operand:VQMOV 0 "register_operand" "") -- (match_operand:VQMOV 1 "register_operand" ""))] -- "TARGET_FLOAT -- && reload_completed -- && GP_REGNUM_P (REGNO (operands[0])) -- && GP_REGNUM_P (REGNO (operands[1]))" -- [(const_int 0)] --{ -- aarch64_simd_emit_reg_reg_move (operands, DImode, 2); -- DONE; --}) -- --(define_split -- [(set (match_operand:VQMOV 0 "register_operand" "") -- (match_operand:VQMOV 1 "register_operand" ""))] -- "TARGET_FLOAT -- && reload_completed -- && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))) -- || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))" -- [(const_int 0)] --{ -- aarch64_split_simd_move (operands[0], operands[1]); -- DONE; --}) -- - (define_expand "@aarch64_split_simd_mov" - [(set (match_operand:VQMOV 0) - (match_operand:VQMOV 1))] --- -2.33.0 - diff --git a/0178-Backport-SME-AArch64-remove-test-comment-from-mov-mo.patch b/0178-Backport-SME-AArch64-remove-test-comment-from-mov-mo.patch deleted file mode 100644 index 95f88fc..0000000 --- a/0178-Backport-SME-AArch64-remove-test-comment-from-mov-mo.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 883af5a13e648e74cb8d8722be6d4980e8bc8f48 Mon Sep 17 00:00:00 2001 -From: Tamar Christina -Date: Tue, 20 Jun 2023 08:54:42 +0100 -Subject: [PATCH 079/157] [Backport][SME] AArch64: remove test comment from - *mov_aarch64 - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=36de416df8b3f109353e309011061fa66e872e3a - -I accidentally left a test comment in the final version of the patch. -This removes the comment. - -gcc/ChangeLog: - - * config/aarch64/aarch64.md (*mov_aarch64): Drop test comment. ---- - gcc/config/aarch64/aarch64.md | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index 29a665e45..1ec23fae8 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -1213,7 +1213,7 @@ - [m, r Z ; store_4 , * ] str\\t%w1, %0 - [m, w ; store_4 , * ] str\t%1, %0 - [r, w ; neon_to_gp , simd ] umov\t%w0, %1.[0] -- [r, w ; neon_to_gp , nosimd] fmov\t%w0, %s1 /*foo */ -+ [r, w ; neon_to_gp , nosimd] fmov\t%w0, %s1 - [w, r Z ; neon_from_gp, simd ] dup\t%0., %w1 - [w, r Z ; neon_from_gp, nosimd] fmov\t%s0, %w1 - [w, w ; neon_dup , simd ] dup\t%0, %1.[0] --- -2.33.0 - diff --git a/0179-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch b/0179-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch deleted file mode 100644 index f99b246..0000000 --- a/0179-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch +++ /dev/null @@ -1,1552 +0,0 @@ -From 4a0e91dc27b30ae673ba132bf2be17a74bc89f31 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:24 +0000 -Subject: [PATCH 080/157] [Backport][SME] aarch64: Distinguish - streaming-compatible AdvSIMD insns - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c86ee4f683e05e5809597d96b5eeb261c9c92cac - -The vast majority of Advanced SIMD instructions are not -available in streaming mode, but some of the load/store/move -instructions are. This patch adds a new target feature macro -called TARGET_BASE_SIMD for this streaming-compatible subset. - -The vector-to-vector move instructions are not streaming-compatible, -so we need to use the SVE move instructions where enabled, or fall -back to the nofp16 handling otherwise. - -I haven't found a good way of testing the SVE EXT alternative -in aarch64_simd_mov_from_high, but I'd rather provide it -than not. - -gcc/ - * config/aarch64/aarch64.h (TARGET_BASE_SIMD): New macro. - (TARGET_SIMD): Require PSTATE.SM to be 0. - (AARCH64_ISA_SM_OFF): New macro. - * config/aarch64/aarch64.cc (aarch64_array_mode_supported_p): - Allow Advanced SIMD structure modes for TARGET_BASE_SIMD. - (aarch64_print_operand): Support '%Z'. - (aarch64_secondary_reload): Expect SVE moves to be used for - Advanced SIMD modes if SVE is enabled and non-streaming - Advanced SIMD isn't. - (aarch64_register_move_cost): Likewise. - (aarch64_simd_container_mode): Extend Advanced SIMD mode - handling to TARGET_BASE_SIMD. - (aarch64_expand_cpymem): Expand commentary. - * config/aarch64/aarch64.md (arches): Add base_simd and nobase_simd. - (arch_enabled): Handle it. - (*mov_aarch64): Extend UMOV alternative to TARGET_BASE_SIMD. - (*movti_aarch64): Use an SVE move instruction if non-streaming - SIMD isn't available. - (*mov_aarch64): Likewise. - (load_pair_dw_tftf): Extend to TARGET_BASE_SIMD. - (store_pair_dw_tftf): Likewise. - (loadwb_pair_): Likewise. - (storewb_pair_): Likewise. - * config/aarch64/aarch64-simd.md (*aarch64_simd_mov): - Allow UMOV in streaming mode. - (*aarch64_simd_mov): Use an SVE move instruction - if non-streaming SIMD isn't available. - (aarch64_store_lane0): Depend on TARGET_FLOAT rather than - TARGET_SIMD. - (aarch64_simd_mov_from_low): Likewise. Use fmov if - Advanced SIMD is completely disabled. - (aarch64_simd_mov_from_high): Use SVE EXT instructions if - non-streaming SIMD isn't available. - -gcc/testsuite/ - * gcc.target/aarch64/movdf_2.c: New test. - * gcc.target/aarch64/movdi_3.c: Likewise. - * gcc.target/aarch64/movhf_2.c: Likewise. - * gcc.target/aarch64/movhi_2.c: Likewise. - * gcc.target/aarch64/movqi_2.c: Likewise. - * gcc.target/aarch64/movsf_2.c: Likewise. - * gcc.target/aarch64/movsi_2.c: Likewise. - * gcc.target/aarch64/movtf_3.c: Likewise. - * gcc.target/aarch64/movtf_4.c: Likewise. - * gcc.target/aarch64/movti_3.c: Likewise. - * gcc.target/aarch64/movti_4.c: Likewise. - * gcc.target/aarch64/movv16qi_4.c: Likewise. - * gcc.target/aarch64/movv16qi_5.c: Likewise. - * gcc.target/aarch64/movv8qi_4.c: Likewise. - * gcc.target/aarch64/sme/arm_neon_1.c: Likewise. - * gcc.target/aarch64/sme/arm_neon_2.c: Likewise. - * gcc.target/aarch64/sme/arm_neon_3.c: Likewise. ---- - gcc/config/aarch64/aarch64-simd.md | 50 ++++++----- - gcc/config/aarch64/aarch64.cc | 16 ++-- - gcc/config/aarch64/aarch64.h | 12 ++- - gcc/config/aarch64/aarch64.md | 77 +++++++++-------- - gcc/testsuite/gcc.target/aarch64/movdf_2.c | 51 +++++++++++ - gcc/testsuite/gcc.target/aarch64/movdi_3.c | 59 +++++++++++++ - gcc/testsuite/gcc.target/aarch64/movhf_2.c | 53 ++++++++++++ - gcc/testsuite/gcc.target/aarch64/movhi_2.c | 61 +++++++++++++ - gcc/testsuite/gcc.target/aarch64/movqi_2.c | 59 +++++++++++++ - gcc/testsuite/gcc.target/aarch64/movsf_2.c | 51 +++++++++++ - gcc/testsuite/gcc.target/aarch64/movsi_2.c | 59 +++++++++++++ - gcc/testsuite/gcc.target/aarch64/movtf_3.c | 81 +++++++++++++++++ - gcc/testsuite/gcc.target/aarch64/movtf_4.c | 78 +++++++++++++++++ - gcc/testsuite/gcc.target/aarch64/movti_3.c | 86 +++++++++++++++++++ - gcc/testsuite/gcc.target/aarch64/movti_4.c | 83 ++++++++++++++++++ - gcc/testsuite/gcc.target/aarch64/movv16qi_4.c | 82 ++++++++++++++++++ - gcc/testsuite/gcc.target/aarch64/movv16qi_5.c | 79 +++++++++++++++++ - gcc/testsuite/gcc.target/aarch64/movv8qi_4.c | 55 ++++++++++++ - .../gcc.target/aarch64/sme/arm_neon_1.c | 13 +++ - .../gcc.target/aarch64/sme/arm_neon_2.c | 11 +++ - .../gcc.target/aarch64/sme/arm_neon_3.c | 11 +++ - 21 files changed, 1062 insertions(+), 65 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/movdf_2.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movdi_3.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movhf_2.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movhi_2.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movqi_2.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movsf_2.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movsi_2.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_3.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_4.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_3.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_4.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_4.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_5.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/movv8qi_4.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/arm_neon_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/arm_neon_2.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/arm_neon_3.c - -diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md -index 1f4b30642..62493cdfa 100644 ---- a/gcc/config/aarch64/aarch64-simd.md -+++ b/gcc/config/aarch64/aarch64-simd.md -@@ -121,19 +121,19 @@ - && (register_operand (operands[0], mode) - || aarch64_simd_reg_or_zero (operands[1], mode))" - {@ [cons: =0, 1; attrs: type, arch] -- [w , m ; neon_load1_1reg , * ] ldr\t%d0, %1 -- [r , m ; load_8 , * ] ldr\t%x0, %1 -- [m , Dz; store_8 , * ] str\txzr, %0 -- [m , w ; neon_store1_1reg, * ] str\t%d1, %0 -- [m , r ; store_8 , * ] str\t%x1, %0 -- [w , w ; neon_logic , simd] mov\t%0., %1. -- [w , w ; neon_logic , * ] fmov\t%d0, %d1 -- [?r, w ; neon_to_gp , simd] umov\t%0, %1.d[0] -- [?r, w ; neon_to_gp , * ] fmov\t%x0, %d1 -- [?w, r ; f_mcr , * ] fmov\t%d0, %1 -- [?r, r ; mov_reg , * ] mov\t%0, %1 -- [w , Dn; neon_move , simd] << aarch64_output_simd_mov_immediate (operands[1], 64); -- [w , Dz; f_mcr , * ] fmov\t%d0, xzr -+ [w , m ; neon_load1_1reg , * ] ldr\t%d0, %1 -+ [r , m ; load_8 , * ] ldr\t%x0, %1 -+ [m , Dz; store_8 , * ] str\txzr, %0 -+ [m , w ; neon_store1_1reg, * ] str\t%d1, %0 -+ [m , r ; store_8 , * ] str\t%x1, %0 -+ [w , w ; neon_logic , simd ] mov\t%0., %1. -+ [w , w ; neon_logic , * ] fmov\t%d0, %d1 -+ [?r, w ; neon_to_gp , base_simd] umov\t%0, %1.d[0] -+ [?r, w ; neon_to_gp , * ] fmov\t%x0, %d1 -+ [?w, r ; f_mcr , * ] fmov\t%d0, %1 -+ [?r, r ; mov_reg , * ] mov\t%0, %1 -+ [w , Dn; neon_move , simd ] << aarch64_output_simd_mov_immediate (operands[1], 64); -+ [w , Dz; f_mcr , * ] fmov\t%d0, xzr - } - ) - -@@ -148,6 +148,7 @@ - [Umn, Dz; store_16 , * , 4] stp\txzr, xzr, %0 - [m , w ; neon_store1_1reg, * , 4] str\t%q1, %0 - [w , w ; neon_logic , simd, 4] mov\t%0., %1. -+ [w , w ; * , sve , 4] mov\t%Z0.d, %Z1.d - [?r , w ; multiple , * , 8] # - [?w , r ; multiple , * , 8] # - [?r , r ; multiple , * , 8] # -@@ -177,7 +178,7 @@ - [(set (match_operand: 0 "memory_operand" "=m") - (vec_select: (match_operand:VALL_F16 1 "register_operand" "w") - (parallel [(match_operand 2 "const_int_operand" "n")])))] -- "TARGET_SIMD -+ "TARGET_FLOAT - && ENDIAN_LANE_N (, INTVAL (operands[2])) == 0" - "str\\t%1, %0" - [(set_attr "type" "neon_store1_1reg")] -@@ -312,35 +313,38 @@ - ) - - (define_insn_and_split "aarch64_simd_mov_from_low" -- [(set (match_operand: 0 "register_operand" "=w,?r") -+ [(set (match_operand: 0 "register_operand" "=w,?r,?r") - (vec_select: -- (match_operand:VQMOV_NO2E 1 "register_operand" "w,w") -+ (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w") - (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))] -- "TARGET_SIMD" -+ "TARGET_FLOAT" - "@ - # -- umov\t%0, %1.d[0]" -+ umov\t%0, %1.d[0] -+ fmov\t%0, %d1" - "&& reload_completed && aarch64_simd_register (operands[0], mode)" - [(set (match_dup 0) (match_dup 1))] - { - operands[1] = aarch64_replace_reg_mode (operands[1], mode); - } -- [(set_attr "type" "mov_reg,neon_to_gp") -+ [(set_attr "type" "mov_reg,neon_to_gp,f_mrc") -+ (set_attr "arch" "simd,base_simd,*") - (set_attr "length" "4")] - ) - - (define_insn "aarch64_simd_mov_from_high" -- [(set (match_operand: 0 "register_operand" "=w,?r,?r") -+ [(set (match_operand: 0 "register_operand" "=w,w,?r,?r") - (vec_select: -- (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w") -+ (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w,w") - (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))] - "TARGET_FLOAT" - "@ - dup\t%d0, %1.d[1] -+ ext\t%Z0.b, %Z0.b, %Z0.b, #8 - umov\t%0, %1.d[1] - fmov\t%0, %1.d[1]" -- [(set_attr "type" "neon_dup,neon_to_gp,f_mrc") -- (set_attr "arch" "simd,simd,*") -+ [(set_attr "type" "neon_dup,*,neon_to_gp,f_mrc") -+ (set_attr "arch" "simd,sve,simd,*") - (set_attr "length" "4")] - ) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 8f8395201..08a98f8ba 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -3999,7 +3999,7 @@ static bool - aarch64_array_mode_supported_p (machine_mode mode, - unsigned HOST_WIDE_INT nelems) - { -- if (TARGET_SIMD -+ if (TARGET_BASE_SIMD - && (AARCH64_VALID_SIMD_QREG_MODE (mode) - || AARCH64_VALID_SIMD_DREG_MODE (mode)) - && (nelems >= 2 && nelems <= 4)) -@@ -12955,8 +12955,8 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, - return NO_REGS; - } - -- /* Without the TARGET_SIMD instructions we cannot move a Q register -- to a Q register directly. We need a scratch. */ -+ /* Without the TARGET_SIMD or TARGET_SVE instructions we cannot move a -+ Q register to a Q register directly. We need a scratch. */ - if (REG_P (x) - && (mode == TFmode - || mode == TImode -@@ -15540,7 +15540,7 @@ aarch64_register_move_cost (machine_mode mode, - secondary reload. A general register is used as a scratch to move - the upper DI value and the lower DI value is moved directly, - hence the cost is the sum of three moves. */ -- if (! TARGET_SIMD) -+ if (!TARGET_SIMD && !TARGET_SVE) - return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP; - - return regmove_cost->FP2FP; -@@ -21107,7 +21107,7 @@ aarch64_simd_container_mode (scalar_mode mode, poly_int64 width) - return aarch64_full_sve_mode (mode).else_mode (word_mode); - - gcc_assert (known_eq (width, 64) || known_eq (width, 128)); -- if (TARGET_SIMD) -+ if (TARGET_BASE_SIMD) - { - if (known_eq (width, 128)) - return aarch64_vq_mode (mode).else_mode (word_mode); -@@ -25221,7 +25221,11 @@ aarch64_expand_cpymem (rtx *operands) - int copy_bits = 256; - - /* Default to 256-bit LDP/STP on large copies, however small copies, no SIMD -- support or slow 256-bit LDP/STP fall back to 128-bit chunks. */ -+ support or slow 256-bit LDP/STP fall back to 128-bit chunks. -+ -+ ??? Although it would be possible to use LDP/STP Qn in streaming mode -+ (so using TARGET_BASE_SIMD instead of TARGET_SIMD), it isn't clear -+ whether that would improve performance. */ - if (size <= 24 - || !TARGET_SIMD - || (aarch64_tune_params.extra_tuning_flags -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index dd2de4e88..a3c83a3b1 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -61,8 +61,15 @@ - #define WORDS_BIG_ENDIAN (BYTES_BIG_ENDIAN) - - /* AdvSIMD is supported in the default configuration, unless disabled by -- -mgeneral-regs-only or by the +nosimd extension. */ --#define TARGET_SIMD (AARCH64_ISA_SIMD) -+ -mgeneral-regs-only or by the +nosimd extension. The set of available -+ instructions is then subdivided into: -+ -+ - the "base" set, available both in SME streaming mode and in -+ non-streaming mode -+ -+ - the full set, available only in non-streaming mode. */ -+#define TARGET_BASE_SIMD (AARCH64_ISA_SIMD) -+#define TARGET_SIMD (AARCH64_ISA_SIMD && AARCH64_ISA_SM_OFF) - #define TARGET_FLOAT (AARCH64_ISA_FP) - - #define UNITS_PER_WORD 8 -@@ -199,6 +206,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - - /* Macros to test ISA flags. */ - -+#define AARCH64_ISA_SM_OFF (aarch64_isa_flags & AARCH64_FL_SM_OFF) - #define AARCH64_ISA_MODE (aarch64_isa_flags & AARCH64_FL_ISA_MODES) - #define AARCH64_ISA_CRC (aarch64_isa_flags & AARCH64_FL_CRC) - #define AARCH64_ISA_CRYPTO (aarch64_isa_flags & AARCH64_FL_CRYPTO) -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index 1ec23fae8..079c8a3f9 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -378,7 +378,8 @@ - ;; As a convenience, "fp_q" means "fp" + the ability to move between - ;; Q registers and is equivalent to "simd". - --(define_enum "arches" [ any rcpc8_4 fp fp_q simd nosimd sve fp16]) -+(define_enum "arches" [any rcpc8_4 fp fp_q base_simd nobase_simd -+ simd nosimd sve fp16]) - - (define_enum_attr "arch" "arches" (const_string "any")) - -@@ -406,6 +407,12 @@ - (and (eq_attr "arch" "fp") - (match_test "TARGET_FLOAT")) - -+ (and (eq_attr "arch" "base_simd") -+ (match_test "TARGET_BASE_SIMD")) -+ -+ (and (eq_attr "arch" "nobase_simd") -+ (match_test "!TARGET_BASE_SIMD")) -+ - (and (eq_attr "arch" "fp_q, simd") - (match_test "TARGET_SIMD")) - -@@ -1202,22 +1209,22 @@ - "(register_operand (operands[0], mode) - || aarch64_reg_or_zero (operands[1], mode))" - {@ [cons: =0, 1; attrs: type, arch] -- [r, r ; mov_reg , * ] mov\t%w0, %w1 -- [r, M ; mov_imm , * ] mov\t%w0, %1 -- [w, D; neon_move , simd ] << aarch64_output_scalar_simd_mov_immediate (operands[1], mode); -+ [r, r ; mov_reg , * ] mov\t%w0, %w1 -+ [r, M ; mov_imm , * ] mov\t%w0, %1 -+ [w, D; neon_move , simd ] << aarch64_output_scalar_simd_mov_immediate (operands[1], mode); - /* The "mov_imm" type for CNT is just a placeholder. */ -- [r, Usv ; mov_imm , sve ] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]); -- [r, Usr ; mov_imm , sve ] << aarch64_output_sve_rdvl (operands[1]); -- [r, m ; load_4 , * ] ldr\t%w0, %1 -- [w, m ; load_4 , * ] ldr\t%0, %1 -- [m, r Z ; store_4 , * ] str\\t%w1, %0 -- [m, w ; store_4 , * ] str\t%1, %0 -- [r, w ; neon_to_gp , simd ] umov\t%w0, %1.[0] -- [r, w ; neon_to_gp , nosimd] fmov\t%w0, %s1 -- [w, r Z ; neon_from_gp, simd ] dup\t%0., %w1 -- [w, r Z ; neon_from_gp, nosimd] fmov\t%s0, %w1 -- [w, w ; neon_dup , simd ] dup\t%0, %1.[0] -- [w, w ; neon_dup , nosimd] fmov\t%s0, %s1 -+ [r, Usv ; mov_imm , sve ] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]); -+ [r, Usr ; mov_imm , sve ] << aarch64_output_sve_rdvl (operands[1]); -+ [r, m ; load_4 , * ] ldr\t%w0, %1 -+ [w, m ; load_4 , * ] ldr\t%0, %1 -+ [m, r Z ; store_4 , * ] str\\t%w1, %0 -+ [m, w ; store_4 , * ] str\t%1, %0 -+ [r, w ; neon_to_gp , base_simd ] umov\t%w0, %1.[0] -+ [r, w ; neon_to_gp , nobase_simd] fmov\t%w0, %s1 -+ [w, r Z ; neon_from_gp, simd ] dup\t%0., %w1 -+ [w, r Z ; neon_from_gp, nosimd ] fmov\t%s0, %w1 -+ [w, w ; neon_dup , simd ] dup\t%0, %1.[0] -+ [w, w ; neon_dup , nosimd ] fmov\t%s0, %s1 - } - ) - -@@ -1372,9 +1379,9 @@ - - (define_insn "*movti_aarch64" - [(set (match_operand:TI 0 -- "nonimmediate_operand" "= r,w,w,w, r,w,r,m,m,w,m") -+ "nonimmediate_operand" "= r,w,w,w, r,w,w,r,m,m,w,m") - (match_operand:TI 1 -- "aarch64_movti_operand" " rUti,Z,Z,r, w,w,m,r,Z,m,w"))] -+ "aarch64_movti_operand" " rUti,Z,Z,r, w,w,w,m,r,Z,m,w"))] - "(register_operand (operands[0], TImode) - || aarch64_reg_or_zero (operands[1], TImode))" - "@ -@@ -1384,16 +1391,17 @@ - # - # - mov\\t%0.16b, %1.16b -+ mov\\t%Z0.d, %Z1.d - ldp\\t%0, %H0, %1 - stp\\t%1, %H1, %0 - stp\\txzr, xzr, %0 - ldr\\t%q0, %1 - str\\t%q1, %0" -- [(set_attr "type" "multiple,neon_move,f_mcr,f_mcr,f_mrc,neon_logic_q, \ -+ [(set_attr "type" "multiple,neon_move,f_mcr,f_mcr,f_mrc,neon_logic_q,*,\ - load_16,store_16,store_16,\ - load_16,store_16") -- (set_attr "length" "8,4,4,8,8,4,4,4,4,4,4") -- (set_attr "arch" "*,simd,*,*,*,simd,*,*,*,fp,fp")] -+ (set_attr "length" "8,4,4,8,8,4,4,4,4,4,4,4") -+ (set_attr "arch" "*,simd,*,*,*,simd,sve,*,*,*,fp,fp")] - ) - - ;; Split a TImode register-register or register-immediate move into -@@ -1529,13 +1537,14 @@ - - (define_insn "*mov_aarch64" - [(set (match_operand:TFD 0 -- "nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m") -+ "nonimmediate_operand" "=w,w,?r ,w ,?r,w,?w,w,m,?r,m ,m") - (match_operand:TFD 1 -- "general_operand" " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))] -+ "general_operand" " w,w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))] - "TARGET_FLOAT && (register_operand (operands[0], mode) - || aarch64_reg_or_fp_zero (operands[1], mode))" - "@ - mov\\t%0.16b, %1.16b -+ mov\\t%Z0.d, %Z1.d - # - # - # -@@ -1546,10 +1555,10 @@ - ldp\\t%0, %H0, %1 - stp\\t%1, %H1, %0 - stp\\txzr, xzr, %0" -- [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\ -+ [(set_attr "type" "logic_reg,*,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\ - f_loadd,f_stored,load_16,store_16,store_16") -- (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4") -- (set_attr "arch" "simd,*,*,*,simd,*,*,*,*,*,*")] -+ (set_attr "length" "4,4,8,8,8,4,4,4,4,4,4,4") -+ (set_attr "arch" "simd,sve,*,*,*,simd,*,*,*,*,*,*")] - ) - - (define_split -@@ -1738,7 +1747,7 @@ - (match_operand:TF 1 "aarch64_mem_pair_operand" "Ump")) - (set (match_operand:TF 2 "register_operand" "=w") - (match_operand:TF 3 "memory_operand" "m"))] -- "TARGET_SIMD -+ "TARGET_BASE_SIMD - && rtx_equal_p (XEXP (operands[3], 0), - plus_constant (Pmode, - XEXP (operands[1], 0), -@@ -1788,11 +1797,11 @@ - (match_operand:TF 1 "register_operand" "w")) - (set (match_operand:TF 2 "memory_operand" "=m") - (match_operand:TF 3 "register_operand" "w"))] -- "TARGET_SIMD && -- rtx_equal_p (XEXP (operands[2], 0), -- plus_constant (Pmode, -- XEXP (operands[0], 0), -- GET_MODE_SIZE (TFmode)))" -+ "TARGET_BASE_SIMD -+ && rtx_equal_p (XEXP (operands[2], 0), -+ plus_constant (Pmode, -+ XEXP (operands[0], 0), -+ GET_MODE_SIZE (TFmode)))" - "stp\\t%q1, %q3, %z0" - [(set_attr "type" "neon_stp_q") - (set_attr "fp" "yes")] -@@ -1840,7 +1849,7 @@ - (set (match_operand:TX 3 "register_operand" "=w") - (mem:TX (plus:P (match_dup 1) - (match_operand:P 5 "const_int_operand" "n"))))])] -- "TARGET_SIMD && INTVAL (operands[5]) == GET_MODE_SIZE (mode)" -+ "TARGET_BASE_SIMD && INTVAL (operands[5]) == GET_MODE_SIZE (mode)" - "ldp\\t%q2, %q3, [%1], %4" - [(set_attr "type" "neon_ldp_q")] - ) -@@ -1890,7 +1899,7 @@ - (set (mem:TX (plus:P (match_dup 0) - (match_operand:P 5 "const_int_operand" "n"))) - (match_operand:TX 3 "register_operand" "w"))])] -- "TARGET_SIMD -+ "TARGET_BASE_SIMD - && INTVAL (operands[5]) - == INTVAL (operands[4]) + GET_MODE_SIZE (mode)" - "stp\\t%q2, %q3, [%0, %4]!" -diff --git a/gcc/testsuite/gcc.target/aarch64/movdf_2.c b/gcc/testsuite/gcc.target/aarch64/movdf_2.c -new file mode 100644 -index 000000000..0d459d317 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movdf_2.c -@@ -0,0 +1,51 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+/* -+** fpr_to_fpr: -+** fmov d0, d1 -+** ret -+*/ -+double -+fpr_to_fpr (double q0, double q1) [[arm::streaming_compatible]] -+{ -+ return q1; -+} -+ -+/* -+** gpr_to_fpr: -+** fmov d0, x0 -+** ret -+*/ -+double -+gpr_to_fpr () [[arm::streaming_compatible]] -+{ -+ register double x0 asm ("x0"); -+ asm volatile ("" : "=r" (x0)); -+ return x0; -+} -+ -+/* -+** zero_to_fpr: -+** fmov d0, xzr -+** ret -+*/ -+double -+zero_to_fpr () [[arm::streaming_compatible]] -+{ -+ return 0; -+} -+ -+/* -+** fpr_to_gpr: -+** fmov x0, d0 -+** ret -+*/ -+void -+fpr_to_gpr (double q0) [[arm::streaming_compatible]] -+{ -+ register double x0 asm ("x0"); -+ x0 = q0; -+ asm volatile ("" :: "r" (x0)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movdi_3.c b/gcc/testsuite/gcc.target/aarch64/movdi_3.c -new file mode 100644 -index 000000000..31b2cbbae ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movdi_3.c -@@ -0,0 +1,59 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#include -+ -+/* -+** fpr_to_fpr: -+** fmov d0, d1 -+** ret -+*/ -+void -+fpr_to_fpr (void) [[arm::streaming_compatible]] -+{ -+ register uint64_t q0 asm ("q0"); -+ register uint64_t q1 asm ("q1"); -+ asm volatile ("" : "=w" (q1)); -+ q0 = q1; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** gpr_to_fpr: -+** fmov d0, x0 -+** ret -+*/ -+void -+gpr_to_fpr (uint64_t x0) [[arm::streaming_compatible]] -+{ -+ register uint64_t q0 asm ("q0"); -+ q0 = x0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** zero_to_fpr: -+** fmov d0, xzr -+** ret -+*/ -+void -+zero_to_fpr () [[arm::streaming_compatible]] -+{ -+ register uint64_t q0 asm ("q0"); -+ q0 = 0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** fpr_to_gpr: -+** fmov x0, d0 -+** ret -+*/ -+uint64_t -+fpr_to_gpr () [[arm::streaming_compatible]] -+{ -+ register uint64_t q0 asm ("q0"); -+ asm volatile ("" : "=w" (q0)); -+ return q0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movhf_2.c b/gcc/testsuite/gcc.target/aarch64/movhf_2.c -new file mode 100644 -index 000000000..3292b0de8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movhf_2.c -@@ -0,0 +1,53 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nothing+simd" -+ -+/* -+** fpr_to_fpr: -+** fmov s0, s1 -+** ret -+*/ -+_Float16 -+fpr_to_fpr (_Float16 q0, _Float16 q1) [[arm::streaming_compatible]] -+{ -+ return q1; -+} -+ -+/* -+** gpr_to_fpr: -+** fmov s0, w0 -+** ret -+*/ -+_Float16 -+gpr_to_fpr () [[arm::streaming_compatible]] -+{ -+ register _Float16 w0 asm ("w0"); -+ asm volatile ("" : "=r" (w0)); -+ return w0; -+} -+ -+/* -+** zero_to_fpr: -+** fmov s0, wzr -+** ret -+*/ -+_Float16 -+zero_to_fpr () [[arm::streaming_compatible]] -+{ -+ return 0; -+} -+ -+/* -+** fpr_to_gpr: -+** fmov w0, s0 -+** ret -+*/ -+void -+fpr_to_gpr (_Float16 q0) [[arm::streaming_compatible]] -+{ -+ register _Float16 w0 asm ("w0"); -+ w0 = q0; -+ asm volatile ("" :: "r" (w0)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movhi_2.c b/gcc/testsuite/gcc.target/aarch64/movhi_2.c -new file mode 100644 -index 000000000..dbbf3486f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movhi_2.c -@@ -0,0 +1,61 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nothing+simd" -+ -+#include -+ -+/* -+** fpr_to_fpr: -+** fmov s0, s1 -+** ret -+*/ -+void -+fpr_to_fpr (void) [[arm::streaming_compatible]] -+{ -+ register uint16_t q0 asm ("q0"); -+ register uint16_t q1 asm ("q1"); -+ asm volatile ("" : "=w" (q1)); -+ q0 = q1; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** gpr_to_fpr: -+** fmov s0, w0 -+** ret -+*/ -+void -+gpr_to_fpr (uint16_t w0) [[arm::streaming_compatible]] -+{ -+ register uint16_t q0 asm ("q0"); -+ q0 = w0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** zero_to_fpr: -+** fmov s0, wzr -+** ret -+*/ -+void -+zero_to_fpr () [[arm::streaming_compatible]] -+{ -+ register uint16_t q0 asm ("q0"); -+ q0 = 0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** fpr_to_gpr: -+** umov w0, v0.h\[0\] -+** ret -+*/ -+uint16_t -+fpr_to_gpr () [[arm::streaming_compatible]] -+{ -+ register uint16_t q0 asm ("q0"); -+ asm volatile ("" : "=w" (q0)); -+ return q0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movqi_2.c b/gcc/testsuite/gcc.target/aarch64/movqi_2.c -new file mode 100644 -index 000000000..aec087e4e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movqi_2.c -@@ -0,0 +1,59 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#include -+ -+/* -+** fpr_to_fpr: -+** fmov s0, s1 -+** ret -+*/ -+void -+fpr_to_fpr (void) [[arm::streaming_compatible]] -+{ -+ register uint8_t q0 asm ("q0"); -+ register uint8_t q1 asm ("q1"); -+ asm volatile ("" : "=w" (q1)); -+ q0 = q1; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** gpr_to_fpr: -+** fmov s0, w0 -+** ret -+*/ -+void -+gpr_to_fpr (uint8_t w0) [[arm::streaming_compatible]] -+{ -+ register uint8_t q0 asm ("q0"); -+ q0 = w0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** zero_to_fpr: -+** fmov s0, wzr -+** ret -+*/ -+void -+zero_to_fpr () [[arm::streaming_compatible]] -+{ -+ register uint8_t q0 asm ("q0"); -+ q0 = 0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** fpr_to_gpr: -+** umov w0, v0.b\[0\] -+** ret -+*/ -+uint8_t -+fpr_to_gpr () [[arm::streaming_compatible]] -+{ -+ register uint8_t q0 asm ("q0"); -+ asm volatile ("" : "=w" (q0)); -+ return q0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movsf_2.c b/gcc/testsuite/gcc.target/aarch64/movsf_2.c -new file mode 100644 -index 000000000..7fed4b22f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movsf_2.c -@@ -0,0 +1,51 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+/* -+** fpr_to_fpr: -+** fmov s0, s1 -+** ret -+*/ -+float -+fpr_to_fpr (float q0, float q1) [[arm::streaming_compatible]] -+{ -+ return q1; -+} -+ -+/* -+** gpr_to_fpr: -+** fmov s0, w0 -+** ret -+*/ -+float -+gpr_to_fpr () [[arm::streaming_compatible]] -+{ -+ register float w0 asm ("w0"); -+ asm volatile ("" : "=r" (w0)); -+ return w0; -+} -+ -+/* -+** zero_to_fpr: -+** fmov s0, wzr -+** ret -+*/ -+float -+zero_to_fpr () [[arm::streaming_compatible]] -+{ -+ return 0; -+} -+ -+/* -+** fpr_to_gpr: -+** fmov w0, s0 -+** ret -+*/ -+void -+fpr_to_gpr (float q0) [[arm::streaming_compatible]] -+{ -+ register float w0 asm ("w0"); -+ w0 = q0; -+ asm volatile ("" :: "r" (w0)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movsi_2.c b/gcc/testsuite/gcc.target/aarch64/movsi_2.c -new file mode 100644 -index 000000000..c14d2468a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movsi_2.c -@@ -0,0 +1,59 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#include -+ -+/* -+** fpr_to_fpr: -+** fmov s0, s1 -+** ret -+*/ -+void -+fpr_to_fpr (void) [[arm::streaming_compatible]] -+{ -+ register uint32_t q0 asm ("q0"); -+ register uint32_t q1 asm ("q1"); -+ asm volatile ("" : "=w" (q1)); -+ q0 = q1; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** gpr_to_fpr: -+** fmov s0, w0 -+** ret -+*/ -+void -+gpr_to_fpr (uint32_t w0) [[arm::streaming_compatible]] -+{ -+ register uint32_t q0 asm ("q0"); -+ q0 = w0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** zero_to_fpr: -+** fmov s0, wzr -+** ret -+*/ -+void -+zero_to_fpr () [[arm::streaming_compatible]] -+{ -+ register uint32_t q0 asm ("q0"); -+ q0 = 0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** fpr_to_gpr: -+** fmov w0, s0 -+** ret -+*/ -+uint32_t -+fpr_to_gpr () [[arm::streaming_compatible]] -+{ -+ register uint32_t q0 asm ("q0"); -+ asm volatile ("" : "=w" (q0)); -+ return q0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_3.c b/gcc/testsuite/gcc.target/aarch64/movtf_3.c -new file mode 100644 -index 000000000..dd164a418 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movtf_3.c -@@ -0,0 +1,81 @@ -+/* { dg-do assemble } */ -+/* { dg-require-effective-target large_long_double } */ -+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nosve" -+ -+/* -+** fpr_to_fpr: -+** sub sp, sp, #16 -+** str q1, \[sp\] -+** ldr q0, \[sp\] -+** add sp, sp, #?16 -+** ret -+*/ -+long double -+fpr_to_fpr (long double q0, long double q1) [[arm::streaming_compatible]] -+{ -+ return q1; -+} -+ -+/* -+** gpr_to_fpr: { target aarch64_little_endian } -+** fmov d0, x0 -+** fmov v0.d\[1\], x1 -+** ret -+*/ -+/* -+** gpr_to_fpr: { target aarch64_big_endian } -+** fmov d0, x1 -+** fmov v0.d\[1\], x0 -+** ret -+*/ -+long double -+gpr_to_fpr () [[arm::streaming_compatible]] -+{ -+ register long double x0 asm ("x0"); -+ asm volatile ("" : "=r" (x0)); -+ return x0; -+} -+ -+/* -+** zero_to_fpr: -+** fmov s0, wzr -+** ret -+*/ -+long double -+zero_to_fpr () [[arm::streaming_compatible]] -+{ -+ return 0; -+} -+ -+/* -+** fpr_to_gpr: { target aarch64_little_endian } -+** ( -+** fmov x0, d0 -+** fmov x1, v0.d\[1\] -+** | -+** fmov x1, v0.d\[1\] -+** fmov x0, d0 -+** ) -+** ret -+*/ -+/* -+** fpr_to_gpr: { target aarch64_big_endian } -+** ( -+** fmov x1, d0 -+** fmov x0, v0.d\[1\] -+** | -+** fmov x0, v0.d\[1\] -+** fmov x1, d0 -+** ) -+** ret -+*/ -+void -+fpr_to_gpr (long double q0) [[arm::streaming_compatible]] -+{ -+ register long double x0 asm ("x0"); -+ x0 = q0; -+ asm volatile ("" :: "r" (x0)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_4.c b/gcc/testsuite/gcc.target/aarch64/movtf_4.c -new file mode 100644 -index 000000000..faf9703e2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movtf_4.c -@@ -0,0 +1,78 @@ -+/* { dg-do assemble } */ -+/* { dg-require-effective-target large_long_double } */ -+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+sve" -+ -+/* -+** fpr_to_fpr: -+** mov z0.d, z1.d -+** ret -+*/ -+long double -+fpr_to_fpr (long double q0, long double q1) [[arm::streaming_compatible]] -+{ -+ return q1; -+} -+ -+/* -+** gpr_to_fpr: { target aarch64_little_endian } -+** fmov d0, x0 -+** fmov v0.d\[1\], x1 -+** ret -+*/ -+/* -+** gpr_to_fpr: { target aarch64_big_endian } -+** fmov d0, x1 -+** fmov v0.d\[1\], x0 -+** ret -+*/ -+long double -+gpr_to_fpr () [[arm::streaming_compatible]] -+{ -+ register long double x0 asm ("x0"); -+ asm volatile ("" : "=r" (x0)); -+ return x0; -+} -+ -+/* -+** zero_to_fpr: -+** fmov s0, wzr -+** ret -+*/ -+long double -+zero_to_fpr () [[arm::streaming_compatible]] -+{ -+ return 0; -+} -+ -+/* -+** fpr_to_gpr: { target aarch64_little_endian } -+** ( -+** fmov x0, d0 -+** fmov x1, v0.d\[1\] -+** | -+** fmov x1, v0.d\[1\] -+** fmov x0, d0 -+** ) -+** ret -+*/ -+/* -+** fpr_to_gpr: { target aarch64_big_endian } -+** ( -+** fmov x1, d0 -+** fmov x0, v0.d\[1\] -+** | -+** fmov x0, v0.d\[1\] -+** fmov x1, d0 -+** ) -+** ret -+*/ -+void -+fpr_to_gpr (long double q0) [[arm::streaming_compatible]] -+{ -+ register long double x0 asm ("x0"); -+ x0 = q0; -+ asm volatile ("" :: "r" (x0)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movti_3.c b/gcc/testsuite/gcc.target/aarch64/movti_3.c -new file mode 100644 -index 000000000..243109181 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movti_3.c -@@ -0,0 +1,86 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nosve" -+ -+/* -+** fpr_to_fpr: -+** sub sp, sp, #16 -+** str q1, \[sp\] -+** ldr q0, \[sp\] -+** add sp, sp, #?16 -+** ret -+*/ -+void -+fpr_to_fpr (void) [[arm::streaming_compatible]] -+{ -+ register __int128_t q0 asm ("q0"); -+ register __int128_t q1 asm ("q1"); -+ asm volatile ("" : "=w" (q1)); -+ q0 = q1; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** gpr_to_fpr: { target aarch64_little_endian } -+** fmov d0, x0 -+** fmov v0.d\[1\], x1 -+** ret -+*/ -+/* -+** gpr_to_fpr: { target aarch64_big_endian } -+** fmov d0, x1 -+** fmov v0.d\[1\], x0 -+** ret -+*/ -+void -+gpr_to_fpr (__int128_t x0) [[arm::streaming_compatible]] -+{ -+ register __int128_t q0 asm ("q0"); -+ q0 = x0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** zero_to_fpr: -+** fmov d0, xzr -+** ret -+*/ -+void -+zero_to_fpr () [[arm::streaming_compatible]] -+{ -+ register __int128_t q0 asm ("q0"); -+ q0 = 0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** fpr_to_gpr: { target aarch64_little_endian } -+** ( -+** fmov x0, d0 -+** fmov x1, v0.d\[1\] -+** | -+** fmov x1, v0.d\[1\] -+** fmov x0, d0 -+** ) -+** ret -+*/ -+/* -+** fpr_to_gpr: { target aarch64_big_endian } -+** ( -+** fmov x1, d0 -+** fmov x0, v0.d\[1\] -+** | -+** fmov x0, v0.d\[1\] -+** fmov x1, d0 -+** ) -+** ret -+*/ -+__int128_t -+fpr_to_gpr () [[arm::streaming_compatible]] -+{ -+ register __int128_t q0 asm ("q0"); -+ asm volatile ("" : "=w" (q0)); -+ return q0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movti_4.c b/gcc/testsuite/gcc.target/aarch64/movti_4.c -new file mode 100644 -index 000000000..a70feccb0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movti_4.c -@@ -0,0 +1,83 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+sve" -+ -+/* -+** fpr_to_fpr: -+** mov z0\.d, z1\.d -+** ret -+*/ -+void -+fpr_to_fpr (void) [[arm::streaming_compatible]] -+{ -+ register __int128_t q0 asm ("q0"); -+ register __int128_t q1 asm ("q1"); -+ asm volatile ("" : "=w" (q1)); -+ q0 = q1; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** gpr_to_fpr: { target aarch64_little_endian } -+** fmov d0, x0 -+** fmov v0.d\[1\], x1 -+** ret -+*/ -+/* -+** gpr_to_fpr: { target aarch64_big_endian } -+** fmov d0, x1 -+** fmov v0.d\[1\], x0 -+** ret -+*/ -+void -+gpr_to_fpr (__int128_t x0) [[arm::streaming_compatible]] -+{ -+ register __int128_t q0 asm ("q0"); -+ q0 = x0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** zero_to_fpr: -+** fmov d0, xzr -+** ret -+*/ -+void -+zero_to_fpr () [[arm::streaming_compatible]] -+{ -+ register __int128_t q0 asm ("q0"); -+ q0 = 0; -+ asm volatile ("" :: "w" (q0)); -+} -+ -+/* -+** fpr_to_gpr: { target aarch64_little_endian } -+** ( -+** fmov x0, d0 -+** fmov x1, v0.d\[1\] -+** | -+** fmov x1, v0.d\[1\] -+** fmov x0, d0 -+** ) -+** ret -+*/ -+/* -+** fpr_to_gpr: { target aarch64_big_endian } -+** ( -+** fmov x1, d0 -+** fmov x0, v0.d\[1\] -+** | -+** fmov x0, v0.d\[1\] -+** fmov x1, d0 -+** ) -+** ret -+*/ -+__int128_t -+fpr_to_gpr () [[arm::streaming_compatible]] -+{ -+ register __int128_t q0 asm ("q0"); -+ asm volatile ("" : "=w" (q0)); -+ return q0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_4.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_4.c -new file mode 100644 -index 000000000..7bec888b7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_4.c -@@ -0,0 +1,82 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nosve" -+ -+typedef unsigned char v16qi __attribute__((vector_size(16))); -+ -+/* -+** fpr_to_fpr: -+** sub sp, sp, #16 -+** str q1, \[sp\] -+** ldr q0, \[sp\] -+** add sp, sp, #?16 -+** ret -+*/ -+v16qi -+fpr_to_fpr (v16qi q0, v16qi q1) [[arm::streaming_compatible]] -+{ -+ return q1; -+} -+ -+/* -+** gpr_to_fpr: { target aarch64_little_endian } -+** fmov d0, x0 -+** fmov v0.d\[1\], x1 -+** ret -+*/ -+/* -+** gpr_to_fpr: { target aarch64_big_endian } -+** fmov d0, x1 -+** fmov v0.d\[1\], x0 -+** ret -+*/ -+v16qi -+gpr_to_fpr () [[arm::streaming_compatible]] -+{ -+ register v16qi x0 asm ("x0"); -+ asm volatile ("" : "=r" (x0)); -+ return x0; -+} -+ -+/* -+** zero_to_fpr: -+** fmov d0, xzr -+** ret -+*/ -+v16qi -+zero_to_fpr () [[arm::streaming_compatible]] -+{ -+ return (v16qi) {}; -+} -+ -+/* -+** fpr_to_gpr: { target aarch64_little_endian } -+** ( -+** umov x0, v0.d\[0\] -+** fmov x1, v0.d\[1\] -+** | -+** fmov x1, v0.d\[1\] -+** umov x0, v0.d\[0\] -+** ) -+** ret -+*/ -+/* -+** fpr_to_gpr: { target aarch64_big_endian } -+** ( -+** umov x1, v0.d\[0\] -+** fmov x0, v0.d\[1\] -+** | -+** fmov x0, v0.d\[1\] -+** umov x1, v0.d\[0\] -+** ) -+** ret -+*/ -+void -+fpr_to_gpr (v16qi q0) [[arm::streaming_compatible]] -+{ -+ register v16qi x0 asm ("x0"); -+ x0 = q0; -+ asm volatile ("" :: "r" (x0)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_5.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_5.c -new file mode 100644 -index 000000000..2d36342b3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_5.c -@@ -0,0 +1,79 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+sve" -+ -+typedef unsigned char v16qi __attribute__((vector_size(16))); -+ -+/* -+** fpr_to_fpr: -+** mov z0.d, z1.d -+** ret -+*/ -+v16qi -+fpr_to_fpr (v16qi q0, v16qi q1) [[arm::streaming_compatible]] -+{ -+ return q1; -+} -+ -+/* -+** gpr_to_fpr: { target aarch64_little_endian } -+** fmov d0, x0 -+** fmov v0.d\[1\], x1 -+** ret -+*/ -+/* -+** gpr_to_fpr: { target aarch64_big_endian } -+** fmov d0, x1 -+** fmov v0.d\[1\], x0 -+** ret -+*/ -+v16qi -+gpr_to_fpr () [[arm::streaming_compatible]] -+{ -+ register v16qi x0 asm ("x0"); -+ asm volatile ("" : "=r" (x0)); -+ return x0; -+} -+ -+/* -+** zero_to_fpr: -+** fmov d0, xzr -+** ret -+*/ -+v16qi -+zero_to_fpr () [[arm::streaming_compatible]] -+{ -+ return (v16qi) {}; -+} -+ -+/* -+** fpr_to_gpr: { target aarch64_little_endian } -+** ( -+** umov x0, v0.d\[0\] -+** fmov x1, v0.d\[1\] -+** | -+** fmov x1, v0.d\[1\] -+** umov x0, v0.d\[0\] -+** ) -+** ret -+*/ -+/* -+** fpr_to_gpr: { target aarch64_big_endian } -+** ( -+** umov x1, v0.d\[0\] -+** fmov x0, v0.d\[1\] -+** | -+** fmov x0, v0.d\[1\] -+** umov x1, v0.d\[0\] -+** ) -+** ret -+*/ -+void -+fpr_to_gpr (v16qi q0) [[arm::streaming_compatible]] -+{ -+ register v16qi x0 asm ("x0"); -+ x0 = q0; -+ asm volatile ("" :: "r" (x0)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_4.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_4.c -new file mode 100644 -index 000000000..12ae25a3a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_4.c -@@ -0,0 +1,55 @@ -+/* { dg-do assemble } */ -+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#pragma GCC target "+nosve" -+ -+typedef unsigned char v8qi __attribute__((vector_size(8))); -+ -+/* -+** fpr_to_fpr: -+** fmov d0, d1 -+** ret -+*/ -+v8qi -+fpr_to_fpr (v8qi q0, v8qi q1) [[arm::streaming_compatible]] -+{ -+ return q1; -+} -+ -+/* -+** gpr_to_fpr: -+** fmov d0, x0 -+** ret -+*/ -+v8qi -+gpr_to_fpr () [[arm::streaming_compatible]] -+{ -+ register v8qi x0 asm ("x0"); -+ asm volatile ("" : "=r" (x0)); -+ return x0; -+} -+ -+/* -+** zero_to_fpr: -+** fmov d0, xzr -+** ret -+*/ -+v8qi -+zero_to_fpr () [[arm::streaming_compatible]] -+{ -+ return (v8qi) {}; -+} -+ -+/* -+** fpr_to_gpr: -+** umov x0, v0\.d\[0\] -+** ret -+*/ -+void -+fpr_to_gpr (v8qi q0) [[arm::streaming_compatible]] -+{ -+ register v8qi x0 asm ("x0"); -+ x0 = q0; -+ asm volatile ("" :: "r" (x0)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_1.c b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_1.c -new file mode 100644 -index 000000000..5b5346cf4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_1.c -@@ -0,0 +1,13 @@ -+// { dg-options "" } -+ -+#include -+ -+#pragma GCC target "+nosme" -+ -+// { dg-error {inlining failed.*'vhaddq_s32'} "" { target *-*-* } 0 } -+ -+int32x4_t -+foo (int32x4_t x, int32x4_t y) [[arm::streaming_compatible]] -+{ -+ return vhaddq_s32 (x, y); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_2.c b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_2.c -new file mode 100644 -index 000000000..2092c4471 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_2.c -@@ -0,0 +1,11 @@ -+// { dg-options "" } -+ -+#include -+ -+// { dg-error {inlining failed.*'vhaddq_s32'} "" { target *-*-* } 0 } -+ -+int32x4_t -+foo (int32x4_t x, int32x4_t y) [[arm::streaming_compatible]] -+{ -+ return vhaddq_s32 (x, y); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_3.c b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_3.c -new file mode 100644 -index 000000000..36794e5b0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_3.c -@@ -0,0 +1,11 @@ -+// { dg-options "" } -+ -+#include -+ -+// { dg-error {inlining failed.*'vhaddq_s32'} "" { target *-*-* } 0 } -+ -+int32x4_t -+foo (int32x4_t x, int32x4_t y) [[arm::streaming]] -+{ -+ return vhaddq_s32 (x, y); -+} --- -2.33.0 - diff --git a/0180-Backport-SME-aarch64-Mark-relevant-SVE-instructions-.patch b/0180-Backport-SME-aarch64-Mark-relevant-SVE-instructions-.patch deleted file mode 100644 index 679e26e..0000000 --- a/0180-Backport-SME-aarch64-Mark-relevant-SVE-instructions-.patch +++ /dev/null @@ -1,4506 +0,0 @@ -From 0404dfa43633a35460aba1b96d04f62cc7d6103b Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:24 +0000 -Subject: [PATCH 081/157] [Backport][SME] aarch64: Mark relevant SVE - instructions as non-streaming - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=983b4365028e9a059b5fb1eef85a297bea19fc8e - -Following on from the previous Advanced SIMD patch, this one -divides SVE instructions into non-streaming and streaming- -compatible groups. - -gcc/ - * config/aarch64/aarch64.h (TARGET_NON_STREAMING): New macro. - (TARGET_SVE2_AES, TARGET_SVE2_BITPERM): Use it. - (TARGET_SVE2_SHA3, TARGET_SVE2_SM4): Likewise. - * config/aarch64/aarch64-sve-builtins-base.def: Separate out - the functions that require PSTATE.SM to be 0 and guard them - with AARCH64_FL_SM_OFF. - * config/aarch64/aarch64-sve-builtins-sve2.def: Likewise. - * config/aarch64/aarch64-sve-builtins.cc (check_required_extensions): - Enforce AARCH64_FL_SM_OFF requirements. - * config/aarch64/aarch64-sve.md (aarch64_wrffr): Require - TARGET_NON_STREAMING - (aarch64_rdffr, aarch64_rdffr_z, *aarch64_rdffr_z_ptest): Likewise. - (*aarch64_rdffr_ptest, *aarch64_rdffr_z_cc, *aarch64_rdffr_cc) - (@aarch64_ldf1): Likewise. - (@aarch64_ldf1_) - (gather_load): Likewise - (mask_gather_load): Likewise. - (mask_gather_load): Likewise. - (*mask_gather_load_xtw_unpacked): Likewise. - (*mask_gather_load_sxtw): Likewise. - (*mask_gather_load_uxtw): Likewise. - (@aarch64_gather_load_) - (@aarch64_gather_load_ - ): Likewise. - (*aarch64_gather_load_ - _xtw_unpacked) - (*aarch64_gather_load_ - _sxtw): Likewise. - (*aarch64_gather_load_ - _uxtw): Likewise. - (@aarch64_ldff1_gather, @aarch64_ldff1_gather): Likewise. - (*aarch64_ldff1_gather_sxtw): Likewise. - (*aarch64_ldff1_gather_uxtw): Likewise. - (@aarch64_ldff1_gather_ - ): Likewise. - (@aarch64_ldff1_gather_ - ): Likewise. - (*aarch64_ldff1_gather_ - _sxtw): Likewise. - (*aarch64_ldff1_gather_ - _uxtw): Likewise. - (@aarch64_sve_gather_prefetch) - (@aarch64_sve_gather_prefetch) - (*aarch64_sve_gather_prefetch_sxtw) - (*aarch64_sve_gather_prefetch_uxtw) - (scatter_store): Likewise. - (mask_scatter_store): Likewise. - (*mask_scatter_store_xtw_unpacked) - (*mask_scatter_store_sxtw): Likewise. - (*mask_scatter_store_uxtw): Likewise. - (@aarch64_scatter_store_trunc) - (@aarch64_scatter_store_trunc) - (*aarch64_scatter_store_trunc_sxtw) - (*aarch64_scatter_store_trunc_uxtw) - (@aarch64_sve_ld1ro, @aarch64_adr): Likewise. - (*aarch64_adr_sxtw, *aarch64_adr_uxtw_unspec): Likewise. - (*aarch64_adr_uxtw_and, @aarch64_adr_shift): Likewise. - (*aarch64_adr_shift, *aarch64_adr_shift_sxtw): Likewise. - (*aarch64_adr_shift_uxtw, @aarch64_sve_add_): Likewise. - (@aarch64_sve_, fold_left_plus_): Likewise. - (mask_fold_left_plus_, @aarch64_sve_compact): Likewise. - * config/aarch64/aarch64-sve2.md (@aarch64_gather_ldnt) - (@aarch64_gather_ldnt_ - ): Likewise. - (@aarch64_sve2_histcnt, @aarch64_sve2_histseg): Likewise. - (@aarch64_pred_): Likewise. - (*aarch64_pred__cc): Likewise. - (*aarch64_pred__ptest): Likewise. - * config/aarch64/iterators.md (SVE_FP_UNARY_INT): Make FEXPA - depend on TARGET_NON_STREAMING. - (SVE_BFLOAT_TERNARY_LONG): Likewise BFMMLA. - -gcc/testsuite/ - * g++.target/aarch64/sve/aarch64-ssve.exp: New harness. - * g++.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp: Add - -DSTREAMING_COMPATIBLE to the list of options. - * g++.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp: Likewise. - * gcc.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp: Likewise. - * gcc.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp: Likewise. - Fix pasto in variable name. - * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: Mark functions - as streaming-compatible if STREAMING_COMPATIBLE is defined. - * gcc.target/aarch64/sve/acle/asm/adda_f16.c: Disable for - streaming-compatible code. - * gcc.target/aarch64/sve/acle/asm/adda_f32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/adda_f64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/adrb.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/adrd.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/adrh.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/adrw.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/compact_f32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/compact_f64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/compact_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/compact_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/compact_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/compact_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/expa_f16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/expa_f32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/expa_f64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1_gather_f32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1_gather_f64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1_gather_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1_gather_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1_gather_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1_gather_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1ro_bf16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1sw_gather_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1sw_gather_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1uw_gather_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ld1uw_gather_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1_bf16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1_f16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1_f32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1_f64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1_gather_f32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1_gather_f64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1_gather_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1_gather_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1_gather_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1_gather_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1_s16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1_s8.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1_u16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1_u8.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sb_s16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sb_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sb_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sb_u16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sb_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sb_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sh_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sh_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sh_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sh_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sw_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1sw_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1ub_s16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1ub_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1ub_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1ub_u16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1ub_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1ub_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1uh_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1uh_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1uh_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1uh_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1uw_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldff1uw_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1_bf16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1_f16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1_f32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1_f64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1_s16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1_s8.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1_u16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1_u8.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1sb_s16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1sb_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1sb_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1sb_u16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1sb_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1sb_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1sh_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1sh_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1sh_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1sh_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1sw_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1sw_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1ub_s16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1ub_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1ub_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1ub_u16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1ub_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1ub_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1uh_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1uh_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1uh_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1uh_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1uw_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/ldnf1uw_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/mmla_f32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/mmla_f64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/mmla_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/mmla_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/prfb_gather.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/prfd_gather.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/prfh_gather.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/prfw_gather.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/rdffr_1.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/st1_scatter_f32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/st1_scatter_f64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/st1_scatter_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/st1_scatter_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/st1_scatter_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/st1_scatter_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/st1b_scatter_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/st1b_scatter_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/st1b_scatter_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/st1b_scatter_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/st1h_scatter_s32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/st1h_scatter_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/st1h_scatter_u32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/st1h_scatter_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/st1w_scatter_s64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/st1w_scatter_u64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/tmad_f16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/tmad_f32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/tmad_f64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/tsmul_f16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/tsmul_f32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/tsmul_f64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/tssel_f16.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/tssel_f32.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/tssel_f64.c: Likewise. - * gcc.target/aarch64/sve/acle/asm/usmmla_s32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/aesd_u8.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/aese_u8.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/aesimc_u8.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/aesmc_u8.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/bdep_u16.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/bdep_u32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/bdep_u64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/bdep_u8.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/bext_u16.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/bext_u32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/bext_u64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/bext_u8.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/bgrp_u16.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/bgrp_u32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/bgrp_u64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/bgrp_u8.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/histcnt_s32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/histcnt_s64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/histcnt_u32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/histcnt_u64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/histseg_s8.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/histseg_u8.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_s64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_u64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_s64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_u64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/match_s16.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/match_s8.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/match_u16.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/match_u8.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/nmatch_s16.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/nmatch_s8.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/nmatch_u16.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/nmatch_u8.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/pmullb_pair_u64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/pmullt_pair_u64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/rax1_s64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/rax1_u64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/sm4e_u32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/sm4ekey_u32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u32.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_s64.c: Likewise. - * gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_u64.c: Likewise. ---- - .../aarch64/aarch64-sve-builtins-base.def | 158 +++++---- - .../aarch64/aarch64-sve-builtins-sve2.def | 63 ++-- - gcc/config/aarch64/aarch64-sve-builtins.cc | 7 + - gcc/config/aarch64/aarch64-sve.md | 124 +++---- - gcc/config/aarch64/aarch64-sve2.md | 14 +- - gcc/config/aarch64/aarch64.h | 11 +- - gcc/config/aarch64/iterators.md | 4 +- - .../g++.target/aarch64/sve/aarch64-ssve.exp | 308 ++++++++++++++++++ - .../aarch64/sve/acle/aarch64-sve-acle-asm.exp | 1 + - .../sve2/acle/aarch64-sve2-acle-asm.exp | 1 + - .../aarch64/sve/acle/aarch64-sve-acle-asm.exp | 1 + - .../aarch64/sve/acle/asm/adda_f16.c | 1 + - .../aarch64/sve/acle/asm/adda_f32.c | 1 + - .../aarch64/sve/acle/asm/adda_f64.c | 1 + - .../gcc.target/aarch64/sve/acle/asm/adrb.c | 1 + - .../gcc.target/aarch64/sve/acle/asm/adrd.c | 1 + - .../gcc.target/aarch64/sve/acle/asm/adrh.c | 1 + - .../gcc.target/aarch64/sve/acle/asm/adrw.c | 1 + - .../aarch64/sve/acle/asm/bfmmla_f32.c | 1 + - .../aarch64/sve/acle/asm/compact_f32.c | 1 + - .../aarch64/sve/acle/asm/compact_f64.c | 1 + - .../aarch64/sve/acle/asm/compact_s32.c | 1 + - .../aarch64/sve/acle/asm/compact_s64.c | 1 + - .../aarch64/sve/acle/asm/compact_u32.c | 1 + - .../aarch64/sve/acle/asm/compact_u64.c | 1 + - .../aarch64/sve/acle/asm/expa_f16.c | 1 + - .../aarch64/sve/acle/asm/expa_f32.c | 1 + - .../aarch64/sve/acle/asm/expa_f64.c | 1 + - .../aarch64/sve/acle/asm/ld1_gather_f32.c | 1 + - .../aarch64/sve/acle/asm/ld1_gather_f64.c | 1 + - .../aarch64/sve/acle/asm/ld1_gather_s32.c | 1 + - .../aarch64/sve/acle/asm/ld1_gather_s64.c | 1 + - .../aarch64/sve/acle/asm/ld1_gather_u32.c | 1 + - .../aarch64/sve/acle/asm/ld1_gather_u64.c | 1 + - .../aarch64/sve/acle/asm/ld1ro_bf16.c | 1 + - .../aarch64/sve/acle/asm/ld1ro_f16.c | 1 + - .../aarch64/sve/acle/asm/ld1ro_f32.c | 1 + - .../aarch64/sve/acle/asm/ld1ro_f64.c | 1 + - .../aarch64/sve/acle/asm/ld1ro_s16.c | 1 + - .../aarch64/sve/acle/asm/ld1ro_s32.c | 1 + - .../aarch64/sve/acle/asm/ld1ro_s64.c | 1 + - .../aarch64/sve/acle/asm/ld1ro_s8.c | 1 + - .../aarch64/sve/acle/asm/ld1ro_u16.c | 1 + - .../aarch64/sve/acle/asm/ld1ro_u32.c | 1 + - .../aarch64/sve/acle/asm/ld1ro_u64.c | 1 + - .../aarch64/sve/acle/asm/ld1ro_u8.c | 1 + - .../aarch64/sve/acle/asm/ld1sb_gather_s32.c | 1 + - .../aarch64/sve/acle/asm/ld1sb_gather_s64.c | 1 + - .../aarch64/sve/acle/asm/ld1sb_gather_u32.c | 1 + - .../aarch64/sve/acle/asm/ld1sb_gather_u64.c | 1 + - .../aarch64/sve/acle/asm/ld1sh_gather_s32.c | 1 + - .../aarch64/sve/acle/asm/ld1sh_gather_s64.c | 1 + - .../aarch64/sve/acle/asm/ld1sh_gather_u32.c | 1 + - .../aarch64/sve/acle/asm/ld1sh_gather_u64.c | 1 + - .../aarch64/sve/acle/asm/ld1sw_gather_s64.c | 1 + - .../aarch64/sve/acle/asm/ld1sw_gather_u64.c | 1 + - .../aarch64/sve/acle/asm/ld1ub_gather_s32.c | 1 + - .../aarch64/sve/acle/asm/ld1ub_gather_s64.c | 1 + - .../aarch64/sve/acle/asm/ld1ub_gather_u32.c | 1 + - .../aarch64/sve/acle/asm/ld1ub_gather_u64.c | 1 + - .../aarch64/sve/acle/asm/ld1uh_gather_s32.c | 1 + - .../aarch64/sve/acle/asm/ld1uh_gather_s64.c | 1 + - .../aarch64/sve/acle/asm/ld1uh_gather_u32.c | 1 + - .../aarch64/sve/acle/asm/ld1uh_gather_u64.c | 1 + - .../aarch64/sve/acle/asm/ld1uw_gather_s64.c | 1 + - .../aarch64/sve/acle/asm/ld1uw_gather_u64.c | 1 + - .../aarch64/sve/acle/asm/ldff1_bf16.c | 1 + - .../aarch64/sve/acle/asm/ldff1_f16.c | 1 + - .../aarch64/sve/acle/asm/ldff1_f32.c | 1 + - .../aarch64/sve/acle/asm/ldff1_f64.c | 1 + - .../aarch64/sve/acle/asm/ldff1_gather_f32.c | 1 + - .../aarch64/sve/acle/asm/ldff1_gather_f64.c | 1 + - .../aarch64/sve/acle/asm/ldff1_gather_s32.c | 1 + - .../aarch64/sve/acle/asm/ldff1_gather_s64.c | 1 + - .../aarch64/sve/acle/asm/ldff1_gather_u32.c | 1 + - .../aarch64/sve/acle/asm/ldff1_gather_u64.c | 1 + - .../aarch64/sve/acle/asm/ldff1_s16.c | 1 + - .../aarch64/sve/acle/asm/ldff1_s32.c | 1 + - .../aarch64/sve/acle/asm/ldff1_s64.c | 1 + - .../aarch64/sve/acle/asm/ldff1_s8.c | 1 + - .../aarch64/sve/acle/asm/ldff1_u16.c | 1 + - .../aarch64/sve/acle/asm/ldff1_u32.c | 1 + - .../aarch64/sve/acle/asm/ldff1_u64.c | 1 + - .../aarch64/sve/acle/asm/ldff1_u8.c | 1 + - .../aarch64/sve/acle/asm/ldff1sb_gather_s32.c | 1 + - .../aarch64/sve/acle/asm/ldff1sb_gather_s64.c | 1 + - .../aarch64/sve/acle/asm/ldff1sb_gather_u32.c | 1 + - .../aarch64/sve/acle/asm/ldff1sb_gather_u64.c | 1 + - .../aarch64/sve/acle/asm/ldff1sb_s16.c | 1 + - .../aarch64/sve/acle/asm/ldff1sb_s32.c | 1 + - .../aarch64/sve/acle/asm/ldff1sb_s64.c | 1 + - .../aarch64/sve/acle/asm/ldff1sb_u16.c | 1 + - .../aarch64/sve/acle/asm/ldff1sb_u32.c | 1 + - .../aarch64/sve/acle/asm/ldff1sb_u64.c | 1 + - .../aarch64/sve/acle/asm/ldff1sh_gather_s32.c | 1 + - .../aarch64/sve/acle/asm/ldff1sh_gather_s64.c | 1 + - .../aarch64/sve/acle/asm/ldff1sh_gather_u32.c | 1 + - .../aarch64/sve/acle/asm/ldff1sh_gather_u64.c | 1 + - .../aarch64/sve/acle/asm/ldff1sh_s32.c | 1 + - .../aarch64/sve/acle/asm/ldff1sh_s64.c | 1 + - .../aarch64/sve/acle/asm/ldff1sh_u32.c | 1 + - .../aarch64/sve/acle/asm/ldff1sh_u64.c | 1 + - .../aarch64/sve/acle/asm/ldff1sw_gather_s64.c | 1 + - .../aarch64/sve/acle/asm/ldff1sw_gather_u64.c | 1 + - .../aarch64/sve/acle/asm/ldff1sw_s64.c | 1 + - .../aarch64/sve/acle/asm/ldff1sw_u64.c | 1 + - .../aarch64/sve/acle/asm/ldff1ub_gather_s32.c | 1 + - .../aarch64/sve/acle/asm/ldff1ub_gather_s64.c | 1 + - .../aarch64/sve/acle/asm/ldff1ub_gather_u32.c | 1 + - .../aarch64/sve/acle/asm/ldff1ub_gather_u64.c | 1 + - .../aarch64/sve/acle/asm/ldff1ub_s16.c | 1 + - .../aarch64/sve/acle/asm/ldff1ub_s32.c | 1 + - .../aarch64/sve/acle/asm/ldff1ub_s64.c | 1 + - .../aarch64/sve/acle/asm/ldff1ub_u16.c | 1 + - .../aarch64/sve/acle/asm/ldff1ub_u32.c | 1 + - .../aarch64/sve/acle/asm/ldff1ub_u64.c | 1 + - .../aarch64/sve/acle/asm/ldff1uh_gather_s32.c | 1 + - .../aarch64/sve/acle/asm/ldff1uh_gather_s64.c | 1 + - .../aarch64/sve/acle/asm/ldff1uh_gather_u32.c | 1 + - .../aarch64/sve/acle/asm/ldff1uh_gather_u64.c | 1 + - .../aarch64/sve/acle/asm/ldff1uh_s32.c | 1 + - .../aarch64/sve/acle/asm/ldff1uh_s64.c | 1 + - .../aarch64/sve/acle/asm/ldff1uh_u32.c | 1 + - .../aarch64/sve/acle/asm/ldff1uh_u64.c | 1 + - .../aarch64/sve/acle/asm/ldff1uw_gather_s64.c | 1 + - .../aarch64/sve/acle/asm/ldff1uw_gather_u64.c | 1 + - .../aarch64/sve/acle/asm/ldff1uw_s64.c | 1 + - .../aarch64/sve/acle/asm/ldff1uw_u64.c | 1 + - .../aarch64/sve/acle/asm/ldnf1_bf16.c | 1 + - .../aarch64/sve/acle/asm/ldnf1_f16.c | 1 + - .../aarch64/sve/acle/asm/ldnf1_f32.c | 1 + - .../aarch64/sve/acle/asm/ldnf1_f64.c | 1 + - .../aarch64/sve/acle/asm/ldnf1_s16.c | 1 + - .../aarch64/sve/acle/asm/ldnf1_s32.c | 1 + - .../aarch64/sve/acle/asm/ldnf1_s64.c | 1 + - .../aarch64/sve/acle/asm/ldnf1_s8.c | 1 + - .../aarch64/sve/acle/asm/ldnf1_u16.c | 1 + - .../aarch64/sve/acle/asm/ldnf1_u32.c | 1 + - .../aarch64/sve/acle/asm/ldnf1_u64.c | 1 + - .../aarch64/sve/acle/asm/ldnf1_u8.c | 1 + - .../aarch64/sve/acle/asm/ldnf1sb_s16.c | 1 + - .../aarch64/sve/acle/asm/ldnf1sb_s32.c | 1 + - .../aarch64/sve/acle/asm/ldnf1sb_s64.c | 1 + - .../aarch64/sve/acle/asm/ldnf1sb_u16.c | 1 + - .../aarch64/sve/acle/asm/ldnf1sb_u32.c | 1 + - .../aarch64/sve/acle/asm/ldnf1sb_u64.c | 1 + - .../aarch64/sve/acle/asm/ldnf1sh_s32.c | 1 + - .../aarch64/sve/acle/asm/ldnf1sh_s64.c | 1 + - .../aarch64/sve/acle/asm/ldnf1sh_u32.c | 1 + - .../aarch64/sve/acle/asm/ldnf1sh_u64.c | 1 + - .../aarch64/sve/acle/asm/ldnf1sw_s64.c | 1 + - .../aarch64/sve/acle/asm/ldnf1sw_u64.c | 1 + - .../aarch64/sve/acle/asm/ldnf1ub_s16.c | 1 + - .../aarch64/sve/acle/asm/ldnf1ub_s32.c | 1 + - .../aarch64/sve/acle/asm/ldnf1ub_s64.c | 1 + - .../aarch64/sve/acle/asm/ldnf1ub_u16.c | 1 + - .../aarch64/sve/acle/asm/ldnf1ub_u32.c | 1 + - .../aarch64/sve/acle/asm/ldnf1ub_u64.c | 1 + - .../aarch64/sve/acle/asm/ldnf1uh_s32.c | 1 + - .../aarch64/sve/acle/asm/ldnf1uh_s64.c | 1 + - .../aarch64/sve/acle/asm/ldnf1uh_u32.c | 1 + - .../aarch64/sve/acle/asm/ldnf1uh_u64.c | 1 + - .../aarch64/sve/acle/asm/ldnf1uw_s64.c | 1 + - .../aarch64/sve/acle/asm/ldnf1uw_u64.c | 1 + - .../aarch64/sve/acle/asm/mmla_f32.c | 1 + - .../aarch64/sve/acle/asm/mmla_f64.c | 1 + - .../aarch64/sve/acle/asm/mmla_s32.c | 1 + - .../aarch64/sve/acle/asm/mmla_u32.c | 1 + - .../aarch64/sve/acle/asm/prfb_gather.c | 1 + - .../aarch64/sve/acle/asm/prfd_gather.c | 1 + - .../aarch64/sve/acle/asm/prfh_gather.c | 1 + - .../aarch64/sve/acle/asm/prfw_gather.c | 1 + - .../gcc.target/aarch64/sve/acle/asm/rdffr_1.c | 1 + - .../aarch64/sve/acle/asm/st1_scatter_f32.c | 1 + - .../aarch64/sve/acle/asm/st1_scatter_f64.c | 1 + - .../aarch64/sve/acle/asm/st1_scatter_s32.c | 1 + - .../aarch64/sve/acle/asm/st1_scatter_s64.c | 1 + - .../aarch64/sve/acle/asm/st1_scatter_u32.c | 1 + - .../aarch64/sve/acle/asm/st1_scatter_u64.c | 1 + - .../aarch64/sve/acle/asm/st1b_scatter_s32.c | 1 + - .../aarch64/sve/acle/asm/st1b_scatter_s64.c | 1 + - .../aarch64/sve/acle/asm/st1b_scatter_u32.c | 1 + - .../aarch64/sve/acle/asm/st1b_scatter_u64.c | 1 + - .../aarch64/sve/acle/asm/st1h_scatter_s32.c | 1 + - .../aarch64/sve/acle/asm/st1h_scatter_s64.c | 1 + - .../aarch64/sve/acle/asm/st1h_scatter_u32.c | 1 + - .../aarch64/sve/acle/asm/st1h_scatter_u64.c | 1 + - .../aarch64/sve/acle/asm/st1w_scatter_s64.c | 1 + - .../aarch64/sve/acle/asm/st1w_scatter_u64.c | 1 + - .../aarch64/sve/acle/asm/test_sve_acle.h | 11 +- - .../aarch64/sve/acle/asm/tmad_f16.c | 1 + - .../aarch64/sve/acle/asm/tmad_f32.c | 1 + - .../aarch64/sve/acle/asm/tmad_f64.c | 1 + - .../aarch64/sve/acle/asm/tsmul_f16.c | 1 + - .../aarch64/sve/acle/asm/tsmul_f32.c | 1 + - .../aarch64/sve/acle/asm/tsmul_f64.c | 1 + - .../aarch64/sve/acle/asm/tssel_f16.c | 1 + - .../aarch64/sve/acle/asm/tssel_f32.c | 1 + - .../aarch64/sve/acle/asm/tssel_f64.c | 1 + - .../aarch64/sve/acle/asm/usmmla_s32.c | 1 + - .../sve2/acle/aarch64-sve2-acle-asm.exp | 1 + - .../aarch64/sve2/acle/asm/aesd_u8.c | 1 + - .../aarch64/sve2/acle/asm/aese_u8.c | 1 + - .../aarch64/sve2/acle/asm/aesimc_u8.c | 1 + - .../aarch64/sve2/acle/asm/aesmc_u8.c | 1 + - .../aarch64/sve2/acle/asm/bdep_u16.c | 1 + - .../aarch64/sve2/acle/asm/bdep_u32.c | 1 + - .../aarch64/sve2/acle/asm/bdep_u64.c | 1 + - .../aarch64/sve2/acle/asm/bdep_u8.c | 1 + - .../aarch64/sve2/acle/asm/bext_u16.c | 1 + - .../aarch64/sve2/acle/asm/bext_u32.c | 1 + - .../aarch64/sve2/acle/asm/bext_u64.c | 1 + - .../aarch64/sve2/acle/asm/bext_u8.c | 1 + - .../aarch64/sve2/acle/asm/bgrp_u16.c | 1 + - .../aarch64/sve2/acle/asm/bgrp_u32.c | 1 + - .../aarch64/sve2/acle/asm/bgrp_u64.c | 1 + - .../aarch64/sve2/acle/asm/bgrp_u8.c | 1 + - .../aarch64/sve2/acle/asm/histcnt_s32.c | 1 + - .../aarch64/sve2/acle/asm/histcnt_s64.c | 1 + - .../aarch64/sve2/acle/asm/histcnt_u32.c | 1 + - .../aarch64/sve2/acle/asm/histcnt_u64.c | 1 + - .../aarch64/sve2/acle/asm/histseg_s8.c | 1 + - .../aarch64/sve2/acle/asm/histseg_u8.c | 1 + - .../aarch64/sve2/acle/asm/ldnt1_gather_f32.c | 1 + - .../aarch64/sve2/acle/asm/ldnt1_gather_f64.c | 1 + - .../aarch64/sve2/acle/asm/ldnt1_gather_s32.c | 1 + - .../aarch64/sve2/acle/asm/ldnt1_gather_s64.c | 1 + - .../aarch64/sve2/acle/asm/ldnt1_gather_u32.c | 1 + - .../aarch64/sve2/acle/asm/ldnt1_gather_u64.c | 1 + - .../sve2/acle/asm/ldnt1sb_gather_s32.c | 1 + - .../sve2/acle/asm/ldnt1sb_gather_s64.c | 1 + - .../sve2/acle/asm/ldnt1sb_gather_u32.c | 1 + - .../sve2/acle/asm/ldnt1sb_gather_u64.c | 1 + - .../sve2/acle/asm/ldnt1sh_gather_s32.c | 1 + - .../sve2/acle/asm/ldnt1sh_gather_s64.c | 1 + - .../sve2/acle/asm/ldnt1sh_gather_u32.c | 1 + - .../sve2/acle/asm/ldnt1sh_gather_u64.c | 1 + - .../sve2/acle/asm/ldnt1sw_gather_s64.c | 1 + - .../sve2/acle/asm/ldnt1sw_gather_u64.c | 1 + - .../sve2/acle/asm/ldnt1ub_gather_s32.c | 1 + - .../sve2/acle/asm/ldnt1ub_gather_s64.c | 1 + - .../sve2/acle/asm/ldnt1ub_gather_u32.c | 1 + - .../sve2/acle/asm/ldnt1ub_gather_u64.c | 1 + - .../sve2/acle/asm/ldnt1uh_gather_s32.c | 1 + - .../sve2/acle/asm/ldnt1uh_gather_s64.c | 1 + - .../sve2/acle/asm/ldnt1uh_gather_u32.c | 1 + - .../sve2/acle/asm/ldnt1uh_gather_u64.c | 1 + - .../sve2/acle/asm/ldnt1uw_gather_s64.c | 1 + - .../sve2/acle/asm/ldnt1uw_gather_u64.c | 1 + - .../aarch64/sve2/acle/asm/match_s16.c | 1 + - .../aarch64/sve2/acle/asm/match_s8.c | 1 + - .../aarch64/sve2/acle/asm/match_u16.c | 1 + - .../aarch64/sve2/acle/asm/match_u8.c | 1 + - .../aarch64/sve2/acle/asm/nmatch_s16.c | 1 + - .../aarch64/sve2/acle/asm/nmatch_s8.c | 1 + - .../aarch64/sve2/acle/asm/nmatch_u16.c | 1 + - .../aarch64/sve2/acle/asm/nmatch_u8.c | 1 + - .../aarch64/sve2/acle/asm/pmullb_pair_u64.c | 1 + - .../aarch64/sve2/acle/asm/pmullt_pair_u64.c | 1 + - .../aarch64/sve2/acle/asm/rax1_s64.c | 1 + - .../aarch64/sve2/acle/asm/rax1_u64.c | 1 + - .../aarch64/sve2/acle/asm/sm4e_u32.c | 1 + - .../aarch64/sve2/acle/asm/sm4ekey_u32.c | 1 + - .../aarch64/sve2/acle/asm/stnt1_scatter_f32.c | 1 + - .../aarch64/sve2/acle/asm/stnt1_scatter_f64.c | 1 + - .../aarch64/sve2/acle/asm/stnt1_scatter_s32.c | 1 + - .../aarch64/sve2/acle/asm/stnt1_scatter_s64.c | 1 + - .../aarch64/sve2/acle/asm/stnt1_scatter_u32.c | 1 + - .../aarch64/sve2/acle/asm/stnt1_scatter_u64.c | 1 + - .../sve2/acle/asm/stnt1b_scatter_s32.c | 1 + - .../sve2/acle/asm/stnt1b_scatter_s64.c | 1 + - .../sve2/acle/asm/stnt1b_scatter_u32.c | 1 + - .../sve2/acle/asm/stnt1b_scatter_u64.c | 1 + - .../sve2/acle/asm/stnt1h_scatter_s32.c | 1 + - .../sve2/acle/asm/stnt1h_scatter_s64.c | 1 + - .../sve2/acle/asm/stnt1h_scatter_u32.c | 1 + - .../sve2/acle/asm/stnt1h_scatter_u64.c | 1 + - .../sve2/acle/asm/stnt1w_scatter_s64.c | 1 + - .../sve2/acle/asm/stnt1w_scatter_u64.c | 1 + - 279 files changed, 805 insertions(+), 165 deletions(-) - create mode 100644 gcc/testsuite/g++.target/aarch64/sve/aarch64-ssve.exp - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def -index 756469959..e732b4792 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-base.def -+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def -@@ -25,12 +25,7 @@ DEF_SVE_FUNCTION (svacgt, compare_opt_n, all_float, implicit) - DEF_SVE_FUNCTION (svacle, compare_opt_n, all_float, implicit) - DEF_SVE_FUNCTION (svaclt, compare_opt_n, all_float, implicit) - DEF_SVE_FUNCTION (svadd, binary_opt_n, all_arith, mxz) --DEF_SVE_FUNCTION (svadda, fold_left, all_float, implicit) - DEF_SVE_FUNCTION (svaddv, reduction_wide, all_arith, implicit) --DEF_SVE_FUNCTION (svadrb, adr_offset, none, none) --DEF_SVE_FUNCTION (svadrd, adr_index, none, none) --DEF_SVE_FUNCTION (svadrh, adr_index, none, none) --DEF_SVE_FUNCTION (svadrw, adr_index, none, none) - DEF_SVE_FUNCTION (svand, binary_opt_n, all_integer, mxz) - DEF_SVE_FUNCTION (svand, binary_opt_n, b, z) - DEF_SVE_FUNCTION (svandv, reduction, all_integer, implicit) -@@ -75,7 +70,6 @@ DEF_SVE_FUNCTION (svcnth_pat, count_pat, none, none) - DEF_SVE_FUNCTION (svcntp, count_pred, all_pred, implicit) - DEF_SVE_FUNCTION (svcntw, count_inherent, none, none) - DEF_SVE_FUNCTION (svcntw_pat, count_pat, none, none) --DEF_SVE_FUNCTION (svcompact, unary, sd_data, implicit) - DEF_SVE_FUNCTION (svcreate2, create, all_data, none) - DEF_SVE_FUNCTION (svcreate3, create, all_data, none) - DEF_SVE_FUNCTION (svcreate4, create, all_data, none) -@@ -93,7 +87,6 @@ DEF_SVE_FUNCTION (svdupq_lane, binary_uint64_n, all_data, none) - DEF_SVE_FUNCTION (sveor, binary_opt_n, all_integer, mxz) - DEF_SVE_FUNCTION (sveor, binary_opt_n, b, z) - DEF_SVE_FUNCTION (sveorv, reduction, all_integer, implicit) --DEF_SVE_FUNCTION (svexpa, unary_uint, all_float, none) - DEF_SVE_FUNCTION (svext, ext, all_data, none) - DEF_SVE_FUNCTION (svextb, unary, hsd_integer, mxz) - DEF_SVE_FUNCTION (svexth, unary, sd_integer, mxz) -@@ -106,51 +99,13 @@ DEF_SVE_FUNCTION (svinsr, binary_n, all_data, none) - DEF_SVE_FUNCTION (svlasta, reduction, all_data, implicit) - DEF_SVE_FUNCTION (svlastb, reduction, all_data, implicit) - DEF_SVE_FUNCTION (svld1, load, all_data, implicit) --DEF_SVE_FUNCTION (svld1_gather, load_gather_sv, sd_data, implicit) --DEF_SVE_FUNCTION (svld1_gather, load_gather_vs, sd_data, implicit) - DEF_SVE_FUNCTION (svld1rq, load_replicate, all_data, implicit) - DEF_SVE_FUNCTION (svld1sb, load_ext, hsd_integer, implicit) --DEF_SVE_FUNCTION (svld1sb_gather, load_ext_gather_offset, sd_integer, implicit) - DEF_SVE_FUNCTION (svld1sh, load_ext, sd_integer, implicit) --DEF_SVE_FUNCTION (svld1sh_gather, load_ext_gather_offset, sd_integer, implicit) --DEF_SVE_FUNCTION (svld1sh_gather, load_ext_gather_index, sd_integer, implicit) - DEF_SVE_FUNCTION (svld1sw, load_ext, d_integer, implicit) --DEF_SVE_FUNCTION (svld1sw_gather, load_ext_gather_offset, d_integer, implicit) --DEF_SVE_FUNCTION (svld1sw_gather, load_ext_gather_index, d_integer, implicit) - DEF_SVE_FUNCTION (svld1ub, load_ext, hsd_integer, implicit) --DEF_SVE_FUNCTION (svld1ub_gather, load_ext_gather_offset, sd_integer, implicit) - DEF_SVE_FUNCTION (svld1uh, load_ext, sd_integer, implicit) --DEF_SVE_FUNCTION (svld1uh_gather, load_ext_gather_offset, sd_integer, implicit) --DEF_SVE_FUNCTION (svld1uh_gather, load_ext_gather_index, sd_integer, implicit) - DEF_SVE_FUNCTION (svld1uw, load_ext, d_integer, implicit) --DEF_SVE_FUNCTION (svld1uw_gather, load_ext_gather_offset, d_integer, implicit) --DEF_SVE_FUNCTION (svld1uw_gather, load_ext_gather_index, d_integer, implicit) --DEF_SVE_FUNCTION (svldff1, load, all_data, implicit) --DEF_SVE_FUNCTION (svldff1_gather, load_gather_sv, sd_data, implicit) --DEF_SVE_FUNCTION (svldff1_gather, load_gather_vs, sd_data, implicit) --DEF_SVE_FUNCTION (svldff1sb, load_ext, hsd_integer, implicit) --DEF_SVE_FUNCTION (svldff1sb_gather, load_ext_gather_offset, sd_integer, implicit) --DEF_SVE_FUNCTION (svldff1sh, load_ext, sd_integer, implicit) --DEF_SVE_FUNCTION (svldff1sh_gather, load_ext_gather_offset, sd_integer, implicit) --DEF_SVE_FUNCTION (svldff1sh_gather, load_ext_gather_index, sd_integer, implicit) --DEF_SVE_FUNCTION (svldff1sw, load_ext, d_integer, implicit) --DEF_SVE_FUNCTION (svldff1sw_gather, load_ext_gather_offset, d_integer, implicit) --DEF_SVE_FUNCTION (svldff1sw_gather, load_ext_gather_index, d_integer, implicit) --DEF_SVE_FUNCTION (svldff1ub, load_ext, hsd_integer, implicit) --DEF_SVE_FUNCTION (svldff1ub_gather, load_ext_gather_offset, sd_integer, implicit) --DEF_SVE_FUNCTION (svldff1uh, load_ext, sd_integer, implicit) --DEF_SVE_FUNCTION (svldff1uh_gather, load_ext_gather_offset, sd_integer, implicit) --DEF_SVE_FUNCTION (svldff1uh_gather, load_ext_gather_index, sd_integer, implicit) --DEF_SVE_FUNCTION (svldff1uw, load_ext, d_integer, implicit) --DEF_SVE_FUNCTION (svldff1uw_gather, load_ext_gather_offset, d_integer, implicit) --DEF_SVE_FUNCTION (svldff1uw_gather, load_ext_gather_index, d_integer, implicit) --DEF_SVE_FUNCTION (svldnf1, load, all_data, implicit) --DEF_SVE_FUNCTION (svldnf1sb, load_ext, hsd_integer, implicit) --DEF_SVE_FUNCTION (svldnf1sh, load_ext, sd_integer, implicit) --DEF_SVE_FUNCTION (svldnf1sw, load_ext, d_integer, implicit) --DEF_SVE_FUNCTION (svldnf1ub, load_ext, hsd_integer, implicit) --DEF_SVE_FUNCTION (svldnf1uh, load_ext, sd_integer, implicit) --DEF_SVE_FUNCTION (svldnf1uw, load_ext, d_integer, implicit) - DEF_SVE_FUNCTION (svldnt1, load, all_data, implicit) - DEF_SVE_FUNCTION (svld2, load, all_data, implicit) - DEF_SVE_FUNCTION (svld3, load, all_data, implicit) -@@ -173,7 +128,6 @@ DEF_SVE_FUNCTION (svmla, ternary_opt_n, all_arith, mxz) - DEF_SVE_FUNCTION (svmla_lane, ternary_lane, all_float, none) - DEF_SVE_FUNCTION (svmls, ternary_opt_n, all_arith, mxz) - DEF_SVE_FUNCTION (svmls_lane, ternary_lane, all_float, none) --DEF_SVE_FUNCTION (svmmla, mmla, none, none) - DEF_SVE_FUNCTION (svmov, unary, b, z) - DEF_SVE_FUNCTION (svmsb, ternary_opt_n, all_arith, mxz) - DEF_SVE_FUNCTION (svmul, binary_opt_n, all_arith, mxz) -@@ -197,13 +151,9 @@ DEF_SVE_FUNCTION (svpfalse, inherent_b, b, none) - DEF_SVE_FUNCTION (svpfirst, unary, b, implicit) - DEF_SVE_FUNCTION (svpnext, unary_pred, all_pred, implicit) - DEF_SVE_FUNCTION (svprfb, prefetch, none, implicit) --DEF_SVE_FUNCTION (svprfb_gather, prefetch_gather_offset, none, implicit) - DEF_SVE_FUNCTION (svprfd, prefetch, none, implicit) --DEF_SVE_FUNCTION (svprfd_gather, prefetch_gather_index, none, implicit) - DEF_SVE_FUNCTION (svprfh, prefetch, none, implicit) --DEF_SVE_FUNCTION (svprfh_gather, prefetch_gather_index, none, implicit) - DEF_SVE_FUNCTION (svprfw, prefetch, none, implicit) --DEF_SVE_FUNCTION (svprfw_gather, prefetch_gather_index, none, implicit) - DEF_SVE_FUNCTION (svptest_any, ptest, none, implicit) - DEF_SVE_FUNCTION (svptest_first, ptest, none, implicit) - DEF_SVE_FUNCTION (svptest_last, ptest, none, implicit) -@@ -244,7 +194,6 @@ DEF_SVE_FUNCTION (svqincw_pat, inc_dec_pat, s_integer, none) - DEF_SVE_FUNCTION (svqincw_pat, inc_dec_pat, sd_integer, none) - DEF_SVE_FUNCTION (svqsub, binary_opt_n, all_integer, none) - DEF_SVE_FUNCTION (svrbit, unary, all_integer, mxz) --DEF_SVE_FUNCTION (svrdffr, rdffr, none, z_or_none) - DEF_SVE_FUNCTION (svrecpe, unary, all_float, none) - DEF_SVE_FUNCTION (svrecps, binary, all_float, none) - DEF_SVE_FUNCTION (svrecpx, unary, all_float, mxz) -@@ -269,20 +218,12 @@ DEF_SVE_FUNCTION (svsel, binary, b, implicit) - DEF_SVE_FUNCTION (svset2, set, all_data, none) - DEF_SVE_FUNCTION (svset3, set, all_data, none) - DEF_SVE_FUNCTION (svset4, set, all_data, none) --DEF_SVE_FUNCTION (svsetffr, setffr, none, none) - DEF_SVE_FUNCTION (svsplice, binary, all_data, implicit) - DEF_SVE_FUNCTION (svsqrt, unary, all_float, mxz) - DEF_SVE_FUNCTION (svst1, store, all_data, implicit) --DEF_SVE_FUNCTION (svst1_scatter, store_scatter_index, sd_data, implicit) --DEF_SVE_FUNCTION (svst1_scatter, store_scatter_offset, sd_data, implicit) - DEF_SVE_FUNCTION (svst1b, store, hsd_integer, implicit) --DEF_SVE_FUNCTION (svst1b_scatter, store_scatter_offset, sd_integer, implicit) - DEF_SVE_FUNCTION (svst1h, store, sd_integer, implicit) --DEF_SVE_FUNCTION (svst1h_scatter, store_scatter_index, sd_integer, implicit) --DEF_SVE_FUNCTION (svst1h_scatter, store_scatter_offset, sd_integer, implicit) - DEF_SVE_FUNCTION (svst1w, store, d_integer, implicit) --DEF_SVE_FUNCTION (svst1w_scatter, store_scatter_index, d_integer, implicit) --DEF_SVE_FUNCTION (svst1w_scatter, store_scatter_offset, d_integer, implicit) - DEF_SVE_FUNCTION (svst2, store, all_data, implicit) - DEF_SVE_FUNCTION (svst3, store, all_data, implicit) - DEF_SVE_FUNCTION (svst4, store, all_data, implicit) -@@ -290,13 +231,10 @@ DEF_SVE_FUNCTION (svstnt1, store, all_data, implicit) - DEF_SVE_FUNCTION (svsub, binary_opt_n, all_arith, mxz) - DEF_SVE_FUNCTION (svsubr, binary_opt_n, all_arith, mxz) - DEF_SVE_FUNCTION (svtbl, binary_uint, all_data, none) --DEF_SVE_FUNCTION (svtmad, tmad, all_float, none) - DEF_SVE_FUNCTION (svtrn1, binary, all_data, none) - DEF_SVE_FUNCTION (svtrn1, binary_pred, all_pred, none) - DEF_SVE_FUNCTION (svtrn2, binary, all_data, none) - DEF_SVE_FUNCTION (svtrn2, binary_pred, all_pred, none) --DEF_SVE_FUNCTION (svtsmul, binary_uint, all_float, none) --DEF_SVE_FUNCTION (svtssel, binary_uint, all_float, none) - DEF_SVE_FUNCTION (svundef, inherent, all_data, none) - DEF_SVE_FUNCTION (svundef2, inherent, all_data, none) - DEF_SVE_FUNCTION (svundef3, inherent, all_data, none) -@@ -311,13 +249,78 @@ DEF_SVE_FUNCTION (svuzp2, binary, all_data, none) - DEF_SVE_FUNCTION (svuzp2, binary_pred, all_pred, none) - DEF_SVE_FUNCTION (svwhilele, compare_scalar, while, none) - DEF_SVE_FUNCTION (svwhilelt, compare_scalar, while, none) --DEF_SVE_FUNCTION (svwrffr, setffr, none, implicit) - DEF_SVE_FUNCTION (svzip1, binary, all_data, none) - DEF_SVE_FUNCTION (svzip1, binary_pred, all_pred, none) - DEF_SVE_FUNCTION (svzip2, binary, all_data, none) - DEF_SVE_FUNCTION (svzip2, binary_pred, all_pred, none) - #undef REQUIRED_EXTENSIONS - -+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_SM_OFF -+DEF_SVE_FUNCTION (svadda, fold_left, all_float, implicit) -+DEF_SVE_FUNCTION (svadrb, adr_offset, none, none) -+DEF_SVE_FUNCTION (svadrd, adr_index, none, none) -+DEF_SVE_FUNCTION (svadrh, adr_index, none, none) -+DEF_SVE_FUNCTION (svadrw, adr_index, none, none) -+DEF_SVE_FUNCTION (svcompact, unary, sd_data, implicit) -+DEF_SVE_FUNCTION (svexpa, unary_uint, all_float, none) -+DEF_SVE_FUNCTION (svld1_gather, load_gather_sv, sd_data, implicit) -+DEF_SVE_FUNCTION (svld1_gather, load_gather_vs, sd_data, implicit) -+DEF_SVE_FUNCTION (svld1sb_gather, load_ext_gather_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svld1sh_gather, load_ext_gather_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svld1sh_gather, load_ext_gather_index, sd_integer, implicit) -+DEF_SVE_FUNCTION (svld1sw_gather, load_ext_gather_offset, d_integer, implicit) -+DEF_SVE_FUNCTION (svld1sw_gather, load_ext_gather_index, d_integer, implicit) -+DEF_SVE_FUNCTION (svld1ub_gather, load_ext_gather_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svld1uh_gather, load_ext_gather_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svld1uh_gather, load_ext_gather_index, sd_integer, implicit) -+DEF_SVE_FUNCTION (svld1uw_gather, load_ext_gather_offset, d_integer, implicit) -+DEF_SVE_FUNCTION (svld1uw_gather, load_ext_gather_index, d_integer, implicit) -+DEF_SVE_FUNCTION (svldff1, load, all_data, implicit) -+DEF_SVE_FUNCTION (svldff1_gather, load_gather_sv, sd_data, implicit) -+DEF_SVE_FUNCTION (svldff1_gather, load_gather_vs, sd_data, implicit) -+DEF_SVE_FUNCTION (svldff1sb, load_ext, hsd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1sb_gather, load_ext_gather_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1sh, load_ext, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1sh_gather, load_ext_gather_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1sh_gather, load_ext_gather_index, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1sw, load_ext, d_integer, implicit) -+DEF_SVE_FUNCTION (svldff1sw_gather, load_ext_gather_offset, d_integer, implicit) -+DEF_SVE_FUNCTION (svldff1sw_gather, load_ext_gather_index, d_integer, implicit) -+DEF_SVE_FUNCTION (svldff1ub, load_ext, hsd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1ub_gather, load_ext_gather_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1uh, load_ext, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1uh_gather, load_ext_gather_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1uh_gather, load_ext_gather_index, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1uw, load_ext, d_integer, implicit) -+DEF_SVE_FUNCTION (svldff1uw_gather, load_ext_gather_offset, d_integer, implicit) -+DEF_SVE_FUNCTION (svldff1uw_gather, load_ext_gather_index, d_integer, implicit) -+DEF_SVE_FUNCTION (svldnf1, load, all_data, implicit) -+DEF_SVE_FUNCTION (svldnf1sb, load_ext, hsd_integer, implicit) -+DEF_SVE_FUNCTION (svldnf1sh, load_ext, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldnf1sw, load_ext, d_integer, implicit) -+DEF_SVE_FUNCTION (svldnf1ub, load_ext, hsd_integer, implicit) -+DEF_SVE_FUNCTION (svldnf1uh, load_ext, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldnf1uw, load_ext, d_integer, implicit) -+DEF_SVE_FUNCTION (svmmla, mmla, none, none) -+DEF_SVE_FUNCTION (svprfb_gather, prefetch_gather_offset, none, implicit) -+DEF_SVE_FUNCTION (svprfd_gather, prefetch_gather_index, none, implicit) -+DEF_SVE_FUNCTION (svprfh_gather, prefetch_gather_index, none, implicit) -+DEF_SVE_FUNCTION (svprfw_gather, prefetch_gather_index, none, implicit) -+DEF_SVE_FUNCTION (svrdffr, rdffr, none, z_or_none) -+DEF_SVE_FUNCTION (svsetffr, setffr, none, none) -+DEF_SVE_FUNCTION (svst1_scatter, store_scatter_index, sd_data, implicit) -+DEF_SVE_FUNCTION (svst1_scatter, store_scatter_offset, sd_data, implicit) -+DEF_SVE_FUNCTION (svst1b_scatter, store_scatter_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svst1h_scatter, store_scatter_index, sd_integer, implicit) -+DEF_SVE_FUNCTION (svst1h_scatter, store_scatter_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svst1w_scatter, store_scatter_index, d_integer, implicit) -+DEF_SVE_FUNCTION (svst1w_scatter, store_scatter_offset, d_integer, implicit) -+DEF_SVE_FUNCTION (svtmad, tmad, all_float, none) -+DEF_SVE_FUNCTION (svtsmul, binary_uint, all_float, none) -+DEF_SVE_FUNCTION (svtssel, binary_uint, all_float, none) -+DEF_SVE_FUNCTION (svwrffr, setffr, none, implicit) -+#undef REQUIRED_EXTENSIONS -+ - #define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_BF16 - DEF_SVE_FUNCTION (svbfdot, ternary_bfloat_opt_n, s_float, none) - DEF_SVE_FUNCTION (svbfdot_lane, ternary_bfloat_lanex2, s_float, none) -@@ -325,27 +328,37 @@ DEF_SVE_FUNCTION (svbfmlalb, ternary_bfloat_opt_n, s_float, none) - DEF_SVE_FUNCTION (svbfmlalb_lane, ternary_bfloat_lane, s_float, none) - DEF_SVE_FUNCTION (svbfmlalt, ternary_bfloat_opt_n, s_float, none) - DEF_SVE_FUNCTION (svbfmlalt_lane, ternary_bfloat_lane, s_float, none) --DEF_SVE_FUNCTION (svbfmmla, ternary_bfloat, s_float, none) - DEF_SVE_FUNCTION (svcvt, unary_convert, cvt_bfloat, mxz) - DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, mx) - #undef REQUIRED_EXTENSIONS - -+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ -+ | AARCH64_FL_BF16 \ -+ | AARCH64_FL_SM_OFF) -+DEF_SVE_FUNCTION (svbfmmla, ternary_bfloat, s_float, none) -+#undef REQUIRED_EXTENSIONS -+ - #define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_I8MM --DEF_SVE_FUNCTION (svmmla, mmla, s_integer, none) --DEF_SVE_FUNCTION (svusmmla, ternary_uintq_intq, s_signed, none) - DEF_SVE_FUNCTION (svsudot, ternary_intq_uintq_opt_n, s_signed, none) - DEF_SVE_FUNCTION (svsudot_lane, ternary_intq_uintq_lane, s_signed, none) - DEF_SVE_FUNCTION (svusdot, ternary_uintq_intq_opt_n, s_signed, none) - DEF_SVE_FUNCTION (svusdot_lane, ternary_uintq_intq_lane, s_signed, none) - #undef REQUIRED_EXTENSIONS - --#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_F32MM -+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ -+ | AARCH64_FL_I8MM \ -+ | AARCH64_FL_SM_OFF) -+DEF_SVE_FUNCTION (svmmla, mmla, s_integer, none) -+DEF_SVE_FUNCTION (svusmmla, ternary_uintq_intq, s_signed, none) -+#undef REQUIRED_EXTENSIONS -+ -+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ -+ | AARCH64_FL_F32MM \ -+ | AARCH64_FL_SM_OFF) - DEF_SVE_FUNCTION (svmmla, mmla, s_float, none) - #undef REQUIRED_EXTENSIONS - - #define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_F64MM --DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit) --DEF_SVE_FUNCTION (svmmla, mmla, d_float, none) - DEF_SVE_FUNCTION (svtrn1q, binary, all_data, none) - DEF_SVE_FUNCTION (svtrn2q, binary, all_data, none) - DEF_SVE_FUNCTION (svuzp1q, binary, all_data, none) -@@ -353,3 +366,10 @@ DEF_SVE_FUNCTION (svuzp2q, binary, all_data, none) - DEF_SVE_FUNCTION (svzip1q, binary, all_data, none) - DEF_SVE_FUNCTION (svzip2q, binary, all_data, none) - #undef REQUIRED_EXTENSIONS -+ -+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ -+ | AARCH64_FL_F64MM \ -+ | AARCH64_FL_SM_OFF) -+DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit) -+DEF_SVE_FUNCTION (svmmla, mmla, d_float, none) -+#undef REQUIRED_EXTENSIONS -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def -index d5f23a887..3c0a0e072 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def -+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def -@@ -51,24 +51,9 @@ DEF_SVE_FUNCTION (sveor3, ternary_opt_n, all_integer, none) - DEF_SVE_FUNCTION (sveorbt, ternary_opt_n, all_integer, none) - DEF_SVE_FUNCTION (sveortb, ternary_opt_n, all_integer, none) - DEF_SVE_FUNCTION (svhadd, binary_opt_n, all_integer, mxz) --DEF_SVE_FUNCTION (svhistcnt, binary_to_uint, sd_integer, z) --DEF_SVE_FUNCTION (svhistseg, binary_to_uint, b_integer, none) - DEF_SVE_FUNCTION (svhsub, binary_opt_n, all_integer, mxz) - DEF_SVE_FUNCTION (svhsubr, binary_opt_n, all_integer, mxz) --DEF_SVE_FUNCTION (svldnt1_gather, load_gather_sv_restricted, sd_data, implicit) --DEF_SVE_FUNCTION (svldnt1_gather, load_gather_vs, sd_data, implicit) --DEF_SVE_FUNCTION (svldnt1sb_gather, load_ext_gather_offset_restricted, sd_integer, implicit) --DEF_SVE_FUNCTION (svldnt1sh_gather, load_ext_gather_offset_restricted, sd_integer, implicit) --DEF_SVE_FUNCTION (svldnt1sh_gather, load_ext_gather_index_restricted, sd_integer, implicit) --DEF_SVE_FUNCTION (svldnt1sw_gather, load_ext_gather_offset_restricted, d_integer, implicit) --DEF_SVE_FUNCTION (svldnt1sw_gather, load_ext_gather_index_restricted, d_integer, implicit) --DEF_SVE_FUNCTION (svldnt1ub_gather, load_ext_gather_offset_restricted, sd_integer, implicit) --DEF_SVE_FUNCTION (svldnt1uh_gather, load_ext_gather_offset_restricted, sd_integer, implicit) --DEF_SVE_FUNCTION (svldnt1uh_gather, load_ext_gather_index_restricted, sd_integer, implicit) --DEF_SVE_FUNCTION (svldnt1uw_gather, load_ext_gather_offset_restricted, d_integer, implicit) --DEF_SVE_FUNCTION (svldnt1uw_gather, load_ext_gather_index_restricted, d_integer, implicit) - DEF_SVE_FUNCTION (svlogb, unary_to_int, all_float, mxz) --DEF_SVE_FUNCTION (svmatch, compare, bh_integer, implicit) - DEF_SVE_FUNCTION (svmaxp, binary, all_arith, mx) - DEF_SVE_FUNCTION (svmaxnmp, binary, all_float, mx) - DEF_SVE_FUNCTION (svmla_lane, ternary_lane, hsd_integer, none) -@@ -91,7 +76,6 @@ DEF_SVE_FUNCTION (svmullb_lane, binary_long_lane, sd_integer, none) - DEF_SVE_FUNCTION (svmullt, binary_long_opt_n, hsd_integer, none) - DEF_SVE_FUNCTION (svmullt_lane, binary_long_lane, sd_integer, none) - DEF_SVE_FUNCTION (svnbsl, ternary_opt_n, all_integer, none) --DEF_SVE_FUNCTION (svnmatch, compare, bh_integer, implicit) - DEF_SVE_FUNCTION (svpmul, binary_opt_n, b_unsigned, none) - DEF_SVE_FUNCTION (svpmullb, binary_long_opt_n, hd_unsigned, none) - DEF_SVE_FUNCTION (svpmullb_pair, binary_opt_n, bs_unsigned, none) -@@ -164,13 +148,6 @@ DEF_SVE_FUNCTION (svsli, ternary_shift_left_imm, all_integer, none) - DEF_SVE_FUNCTION (svsqadd, binary_int_opt_n, all_unsigned, mxz) - DEF_SVE_FUNCTION (svsra, ternary_shift_right_imm, all_integer, none) - DEF_SVE_FUNCTION (svsri, ternary_shift_right_imm, all_integer, none) --DEF_SVE_FUNCTION (svstnt1_scatter, store_scatter_index_restricted, sd_data, implicit) --DEF_SVE_FUNCTION (svstnt1_scatter, store_scatter_offset_restricted, sd_data, implicit) --DEF_SVE_FUNCTION (svstnt1b_scatter, store_scatter_offset_restricted, sd_integer, implicit) --DEF_SVE_FUNCTION (svstnt1h_scatter, store_scatter_index_restricted, sd_integer, implicit) --DEF_SVE_FUNCTION (svstnt1h_scatter, store_scatter_offset_restricted, sd_integer, implicit) --DEF_SVE_FUNCTION (svstnt1w_scatter, store_scatter_index_restricted, d_integer, implicit) --DEF_SVE_FUNCTION (svstnt1w_scatter, store_scatter_offset_restricted, d_integer, implicit) - DEF_SVE_FUNCTION (svsubhnb, binary_narrowb_opt_n, hsd_integer, none) - DEF_SVE_FUNCTION (svsubhnt, binary_narrowt_opt_n, hsd_integer, none) - DEF_SVE_FUNCTION (svsublb, binary_long_opt_n, hsd_integer, none) -@@ -191,7 +168,36 @@ DEF_SVE_FUNCTION (svxar, ternary_shift_right_imm, all_integer, none) - - #define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ - | AARCH64_FL_SVE2 \ -- | AARCH64_FL_SVE2_AES) -+ | AARCH64_FL_SM_OFF) -+DEF_SVE_FUNCTION (svhistcnt, binary_to_uint, sd_integer, z) -+DEF_SVE_FUNCTION (svhistseg, binary_to_uint, b_integer, none) -+DEF_SVE_FUNCTION (svldnt1_gather, load_gather_sv_restricted, sd_data, implicit) -+DEF_SVE_FUNCTION (svldnt1_gather, load_gather_vs, sd_data, implicit) -+DEF_SVE_FUNCTION (svldnt1sb_gather, load_ext_gather_offset_restricted, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldnt1sh_gather, load_ext_gather_offset_restricted, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldnt1sh_gather, load_ext_gather_index_restricted, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldnt1sw_gather, load_ext_gather_offset_restricted, d_integer, implicit) -+DEF_SVE_FUNCTION (svldnt1sw_gather, load_ext_gather_index_restricted, d_integer, implicit) -+DEF_SVE_FUNCTION (svldnt1ub_gather, load_ext_gather_offset_restricted, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldnt1uh_gather, load_ext_gather_offset_restricted, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldnt1uh_gather, load_ext_gather_index_restricted, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldnt1uw_gather, load_ext_gather_offset_restricted, d_integer, implicit) -+DEF_SVE_FUNCTION (svldnt1uw_gather, load_ext_gather_index_restricted, d_integer, implicit) -+DEF_SVE_FUNCTION (svmatch, compare, bh_integer, implicit) -+DEF_SVE_FUNCTION (svnmatch, compare, bh_integer, implicit) -+DEF_SVE_FUNCTION (svstnt1_scatter, store_scatter_index_restricted, sd_data, implicit) -+DEF_SVE_FUNCTION (svstnt1_scatter, store_scatter_offset_restricted, sd_data, implicit) -+DEF_SVE_FUNCTION (svstnt1b_scatter, store_scatter_offset_restricted, sd_integer, implicit) -+DEF_SVE_FUNCTION (svstnt1h_scatter, store_scatter_index_restricted, sd_integer, implicit) -+DEF_SVE_FUNCTION (svstnt1h_scatter, store_scatter_offset_restricted, sd_integer, implicit) -+DEF_SVE_FUNCTION (svstnt1w_scatter, store_scatter_index_restricted, d_integer, implicit) -+DEF_SVE_FUNCTION (svstnt1w_scatter, store_scatter_offset_restricted, d_integer, implicit) -+#undef REQUIRED_EXTENSIONS -+ -+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ -+ | AARCH64_FL_SVE2 \ -+ | AARCH64_FL_SVE2_AES \ -+ | AARCH64_FL_SM_OFF) - DEF_SVE_FUNCTION (svaesd, binary, b_unsigned, none) - DEF_SVE_FUNCTION (svaese, binary, b_unsigned, none) - DEF_SVE_FUNCTION (svaesmc, unary, b_unsigned, none) -@@ -202,7 +208,8 @@ DEF_SVE_FUNCTION (svpmullt_pair, binary_opt_n, d_unsigned, none) - - #define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ - | AARCH64_FL_SVE2 \ -- | AARCH64_FL_SVE2_BITPERM) -+ | AARCH64_FL_SVE2_BITPERM \ -+ | AARCH64_FL_SM_OFF) - DEF_SVE_FUNCTION (svbdep, binary_opt_n, all_unsigned, none) - DEF_SVE_FUNCTION (svbext, binary_opt_n, all_unsigned, none) - DEF_SVE_FUNCTION (svbgrp, binary_opt_n, all_unsigned, none) -@@ -210,13 +217,15 @@ DEF_SVE_FUNCTION (svbgrp, binary_opt_n, all_unsigned, none) - - #define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ - | AARCH64_FL_SVE2 \ -- | AARCH64_FL_SVE2_SHA3) -+ | AARCH64_FL_SVE2_SHA3 \ -+ | AARCH64_FL_SM_OFF) - DEF_SVE_FUNCTION (svrax1, binary, d_integer, none) - #undef REQUIRED_EXTENSIONS - - #define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \ - | AARCH64_FL_SVE2 \ -- | AARCH64_FL_SVE2_SM4) -+ | AARCH64_FL_SVE2_SM4 \ -+ | AARCH64_FL_SM_OFF) - DEF_SVE_FUNCTION (svsm4e, binary, s_unsigned, none) - DEF_SVE_FUNCTION (svsm4ekey, binary, s_unsigned, none) - #undef REQUIRED_EXTENSIONS -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc -index c439f2e8a..5f3a2baea 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc -@@ -738,6 +738,13 @@ check_required_extensions (location_t location, tree fndecl, - if (missing_extensions == 0) - return check_required_registers (location, fndecl); - -+ if (missing_extensions & AARCH64_FL_SM_OFF) -+ { -+ error_at (location, "ACLE function %qD cannot be called when" -+ " SME streaming mode is enabled", fndecl); -+ return false; -+ } -+ - static const struct { - aarch64_feature_flags flag; - const char *name; -diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md -index 28b73d807..a8a5dc3a2 100644 ---- a/gcc/config/aarch64/aarch64-sve.md -+++ b/gcc/config/aarch64/aarch64-sve.md -@@ -1082,7 +1082,7 @@ - (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one" "Dm, Upa")) - (set (reg:VNx16BI FFRT_REGNUM) - (unspec:VNx16BI [(match_dup 0)] UNSPEC_WRFFR))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - setffr - wrffr\t%0.b" -@@ -1123,7 +1123,7 @@ - (define_insn "aarch64_rdffr" - [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") - (reg:VNx16BI FFRT_REGNUM))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "rdffr\t%0.b" - ) - -@@ -1133,7 +1133,7 @@ - (and:VNx16BI - (reg:VNx16BI FFRT_REGNUM) - (match_operand:VNx16BI 1 "register_operand" "Upa")))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "rdffr\t%0.b, %1/z" - ) - -@@ -1149,7 +1149,7 @@ - (match_dup 1))] - UNSPEC_PTEST)) - (clobber (match_scratch:VNx16BI 0 "=Upa"))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "rdffrs\t%0.b, %1/z" - ) - -@@ -1163,7 +1163,7 @@ - (reg:VNx16BI FFRT_REGNUM)] - UNSPEC_PTEST)) - (clobber (match_scratch:VNx16BI 0 "=Upa"))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "rdffrs\t%0.b, %1/z" - ) - -@@ -1182,7 +1182,7 @@ - (and:VNx16BI - (reg:VNx16BI FFRT_REGNUM) - (match_dup 1)))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "rdffrs\t%0.b, %1/z" - ) - -@@ -1197,7 +1197,7 @@ - UNSPEC_PTEST)) - (set (match_operand:VNx16BI 0 "register_operand" "=Upa") - (reg:VNx16BI FFRT_REGNUM))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "rdffrs\t%0.b, %1/z" - ) - -@@ -1327,7 +1327,7 @@ - (match_operand:SVE_FULL 1 "aarch64_sve_ldf1_operand" "Ut") - (reg:VNx16BI FFRT_REGNUM)] - SVE_LDFF1_LDNF1))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "ldf1\t%0., %2/z, %1" - ) - -@@ -1361,7 +1361,9 @@ - (reg:VNx16BI FFRT_REGNUM)] - SVE_LDFF1_LDNF1))] - UNSPEC_PRED_X))] -- "TARGET_SVE && (~ & ) == 0" -+ "TARGET_SVE -+ && TARGET_NON_STREAMING -+ && (~ & ) == 0" - "ldf1\t%0., %2/z, %1" - "&& !CONSTANT_P (operands[3])" - { -@@ -1409,7 +1411,7 @@ - (match_operand:DI 4 "aarch64_gather_scale_operand_") - (mem:BLK (scratch))] - UNSPEC_LD1_GATHER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - { - operands[5] = aarch64_ptrue_reg (mode); - } -@@ -1427,7 +1429,7 @@ - (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, Ui1, i, i") - (mem:BLK (scratch))] - UNSPEC_LD1_GATHER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - ld1\t%0.s, %5/z, [%2.s] - ld1\t%0.s, %5/z, [%2.s, #%1] -@@ -1449,7 +1451,7 @@ - (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, i") - (mem:BLK (scratch))] - UNSPEC_LD1_GATHER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - ld1\t%0.d, %5/z, [%2.d] - ld1\t%0.d, %5/z, [%2.d, #%1] -@@ -1472,7 +1474,7 @@ - (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, i") - (mem:BLK (scratch))] - UNSPEC_LD1_GATHER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - ld1\t%0.d, %5/z, [%1, %2.d, xtw] - ld1\t%0.d, %5/z, [%1, %2.d, xtw %p4]" -@@ -1499,7 +1501,7 @@ - (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, i") - (mem:BLK (scratch))] - UNSPEC_LD1_GATHER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - ld1\t%0.d, %5/z, [%1, %2.d, sxtw] - ld1\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" -@@ -1523,7 +1525,7 @@ - (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, i") - (mem:BLK (scratch))] - UNSPEC_LD1_GATHER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - ld1\t%0.d, %5/z, [%1, %2.d, uxtw] - ld1\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" -@@ -1557,7 +1559,9 @@ - (mem:BLK (scratch))] - UNSPEC_LD1_GATHER))] - UNSPEC_PRED_X))] -- "TARGET_SVE && (~ & ) == 0" -+ "TARGET_SVE -+ && TARGET_NON_STREAMING -+ && (~ & ) == 0" - "@ - ld1\t%0.s, %5/z, [%2.s] - ld1\t%0.s, %5/z, [%2.s, #%1] -@@ -1587,7 +1591,9 @@ - (mem:BLK (scratch))] - UNSPEC_LD1_GATHER))] - UNSPEC_PRED_X))] -- "TARGET_SVE && (~ & ) == 0" -+ "TARGET_SVE -+ && TARGET_NON_STREAMING -+ && (~ & ) == 0" - "@ - ld1\t%0.d, %5/z, [%2.d] - ld1\t%0.d, %5/z, [%2.d, #%1] -@@ -1618,7 +1624,9 @@ - (mem:BLK (scratch))] - UNSPEC_LD1_GATHER))] - UNSPEC_PRED_X))] -- "TARGET_SVE && (~ & ) == 0" -+ "TARGET_SVE -+ && TARGET_NON_STREAMING -+ && (~ & ) == 0" - "@ - ld1\t%0.d, %5/z, [%1, %2.d, xtw] - ld1\t%0.d, %5/z, [%1, %2.d, xtw %p4]" -@@ -1650,7 +1658,9 @@ - (mem:BLK (scratch))] - UNSPEC_LD1_GATHER))] - UNSPEC_PRED_X))] -- "TARGET_SVE && (~ & ) == 0" -+ "TARGET_SVE -+ && TARGET_NON_STREAMING -+ && (~ & ) == 0" - "@ - ld1\t%0.d, %5/z, [%1, %2.d, sxtw] - ld1\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" -@@ -1679,7 +1689,9 @@ - (mem:BLK (scratch))] - UNSPEC_LD1_GATHER))] - UNSPEC_PRED_X))] -- "TARGET_SVE && (~ & ) == 0" -+ "TARGET_SVE -+ && TARGET_NON_STREAMING -+ && (~ & ) == 0" - "@ - ld1\t%0.d, %5/z, [%1, %2.d, uxtw] - ld1\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" -@@ -1710,7 +1722,7 @@ - (mem:BLK (scratch)) - (reg:VNx16BI FFRT_REGNUM)] - UNSPEC_LDFF1_GATHER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - ldff1w\t%0.s, %5/z, [%2.s] - ldff1w\t%0.s, %5/z, [%2.s, #%1] -@@ -1733,7 +1745,7 @@ - (mem:BLK (scratch)) - (reg:VNx16BI FFRT_REGNUM)] - UNSPEC_LDFF1_GATHER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - ldff1d\t%0.d, %5/z, [%2.d] - ldff1d\t%0.d, %5/z, [%2.d, #%1] -@@ -1758,7 +1770,7 @@ - (mem:BLK (scratch)) - (reg:VNx16BI FFRT_REGNUM)] - UNSPEC_LDFF1_GATHER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw] - ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" -@@ -1782,7 +1794,7 @@ - (mem:BLK (scratch)) - (reg:VNx16BI FFRT_REGNUM)] - UNSPEC_LDFF1_GATHER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw] - ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" -@@ -1817,7 +1829,7 @@ - (reg:VNx16BI FFRT_REGNUM)] - UNSPEC_LDFF1_GATHER))] - UNSPEC_PRED_X))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - ldff1\t%0.s, %5/z, [%2.s] - ldff1\t%0.s, %5/z, [%2.s, #%1] -@@ -1848,7 +1860,7 @@ - (reg:VNx16BI FFRT_REGNUM)] - UNSPEC_LDFF1_GATHER))] - UNSPEC_PRED_X))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - ldff1\t%0.d, %5/z, [%2.d] - ldff1\t%0.d, %5/z, [%2.d, #%1] -@@ -1881,7 +1893,7 @@ - (reg:VNx16BI FFRT_REGNUM)] - UNSPEC_LDFF1_GATHER))] - UNSPEC_PRED_X))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - ldff1\t%0.d, %5/z, [%1, %2.d, sxtw] - ldff1\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" -@@ -1910,7 +1922,7 @@ - (reg:VNx16BI FFRT_REGNUM)] - UNSPEC_LDFF1_GATHER))] - UNSPEC_PRED_X))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - ldff1\t%0.d, %5/z, [%1, %2.d, uxtw] - ldff1\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" -@@ -1985,7 +1997,7 @@ - UNSPEC_SVE_PREFETCH_GATHER) - (match_operand:DI 7 "const_int_operand") - (match_operand:DI 8 "const_int_operand"))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - { - static const char *const insns[][2] = { - "prf", "%0, [%2.s]", -@@ -2014,7 +2026,7 @@ - UNSPEC_SVE_PREFETCH_GATHER) - (match_operand:DI 7 "const_int_operand") - (match_operand:DI 8 "const_int_operand"))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - { - static const char *const insns[][2] = { - "prf", "%0, [%2.d]", -@@ -2045,7 +2057,7 @@ - UNSPEC_SVE_PREFETCH_GATHER) - (match_operand:DI 7 "const_int_operand") - (match_operand:DI 8 "const_int_operand"))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - { - static const char *const insns[][2] = { - "prfb", "%0, [%1, %2.d, sxtw]", -@@ -2075,7 +2087,7 @@ - UNSPEC_SVE_PREFETCH_GATHER) - (match_operand:DI 7 "const_int_operand") - (match_operand:DI 8 "const_int_operand"))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - { - static const char *const insns[][2] = { - "prfb", "%0, [%1, %2.d, uxtw]", -@@ -2242,7 +2254,7 @@ - (match_operand:DI 3 "aarch64_gather_scale_operand_") - (match_operand:SVE_24 4 "register_operand")] - UNSPEC_ST1_SCATTER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - { - operands[5] = aarch64_ptrue_reg (mode); - } -@@ -2260,7 +2272,7 @@ - (match_operand:DI 3 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, Ui1, i, i") - (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")] - UNSPEC_ST1_SCATTER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - st1\t%4.s, %5, [%1.s] - st1\t%4.s, %5, [%1.s, #%0] -@@ -2282,7 +2294,7 @@ - (match_operand:DI 3 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, i") - (match_operand:SVE_2 4 "register_operand" "w, w, w, w")] - UNSPEC_ST1_SCATTER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - st1\t%4.d, %5, [%1.d] - st1\t%4.d, %5, [%1.d, #%0] -@@ -2305,7 +2317,7 @@ - (match_operand:DI 3 "aarch64_gather_scale_operand_" "Ui1, i") - (match_operand:SVE_2 4 "register_operand" "w, w")] - UNSPEC_ST1_SCATTER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - st1\t%4.d, %5, [%0, %1.d, xtw] - st1\t%4.d, %5, [%0, %1.d, xtw %p3]" -@@ -2332,7 +2344,7 @@ - (match_operand:DI 3 "aarch64_gather_scale_operand_" "Ui1, i") - (match_operand:SVE_2 4 "register_operand" "w, w")] - UNSPEC_ST1_SCATTER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - st1\t%4.d, %5, [%0, %1.d, sxtw] - st1\t%4.d, %5, [%0, %1.d, sxtw %p3]" -@@ -2356,7 +2368,7 @@ - (match_operand:DI 3 "aarch64_gather_scale_operand_" "Ui1, i") - (match_operand:SVE_2 4 "register_operand" "w, w")] - UNSPEC_ST1_SCATTER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - st1\t%4.d, %5, [%0, %1.d, uxtw] - st1\t%4.d, %5, [%0, %1.d, uxtw %p3]" -@@ -2384,7 +2396,7 @@ - (truncate:VNx4_NARROW - (match_operand:VNx4_WIDE 4 "register_operand" "w, w, w, w, w, w"))] - UNSPEC_ST1_SCATTER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - st1\t%4.s, %5, [%1.s] - st1\t%4.s, %5, [%1.s, #%0] -@@ -2407,7 +2419,7 @@ - (truncate:VNx2_NARROW - (match_operand:VNx2_WIDE 4 "register_operand" "w, w, w, w"))] - UNSPEC_ST1_SCATTER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - st1\t%4.d, %5, [%1.d] - st1\t%4.d, %5, [%1.d, #%0] -@@ -2432,7 +2444,7 @@ - (truncate:VNx2_NARROW - (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))] - UNSPEC_ST1_SCATTER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - st1\t%4.d, %5, [%0, %1.d, sxtw] - st1\t%4.d, %5, [%0, %1.d, sxtw %p3]" -@@ -2456,7 +2468,7 @@ - (truncate:VNx2_NARROW - (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))] - UNSPEC_ST1_SCATTER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - st1\t%4.d, %5, [%0, %1.d, uxtw] - st1\t%4.d, %5, [%0, %1.d, uxtw %p3]" -@@ -2602,7 +2614,7 @@ - (match_operand:OI 1 "aarch64_sve_ld1ro_operand_" - "UO")] - UNSPEC_LD1RO))] -- "TARGET_SVE_F64MM" -+ "TARGET_SVE_F64MM && TARGET_NON_STREAMING" - { - operands[1] = gen_rtx_MEM (mode, XEXP (operands[1], 0)); - return "ld1ro\t%0., %2/z, %1"; -@@ -3834,7 +3846,7 @@ - [(match_operand:SVE_FULL_SDI 1 "register_operand" "w") - (match_operand:SVE_FULL_SDI 2 "register_operand" "w")] - UNSPEC_ADR))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "adr\t%0., [%1., %2.]" - ) - -@@ -3850,7 +3862,7 @@ - (match_operand:VNx2DI 2 "register_operand" "w")))] - UNSPEC_PRED_X)] - UNSPEC_ADR))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "adr\t%0.d, [%1.d, %2.d, sxtw]" - "&& !CONSTANT_P (operands[3])" - { -@@ -3867,7 +3879,7 @@ - (match_operand:VNx2DI 2 "register_operand" "w") - (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))] - UNSPEC_ADR))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "adr\t%0.d, [%1.d, %2.d, uxtw]" - ) - -@@ -3879,7 +3891,7 @@ - (match_operand:VNx2DI 2 "register_operand" "w") - (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate")) - (match_operand:VNx2DI 1 "register_operand" "w")))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "adr\t%0.d, [%1.d, %2.d, uxtw]" - ) - -@@ -3894,7 +3906,7 @@ - (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))] - UNSPEC_PRED_X) - (match_operand:SVE_FULL_SDI 1 "register_operand")))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - { - operands[4] = CONSTM1_RTX (mode); - } -@@ -3910,7 +3922,7 @@ - (match_operand:SVE_24I 3 "const_1_to_3_operand"))] - UNSPEC_PRED_X) - (match_operand:SVE_24I 1 "register_operand" "w")))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "adr\t%0., [%1., %2., lsl %3]" - "&& !CONSTANT_P (operands[4])" - { -@@ -3934,7 +3946,7 @@ - (match_operand:VNx2DI 3 "const_1_to_3_operand"))] - UNSPEC_PRED_X) - (match_operand:VNx2DI 1 "register_operand" "w")))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "adr\t%0.d, [%1.d, %2.d, sxtw %3]" - "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))" - { -@@ -3955,7 +3967,7 @@ - (match_operand:VNx2DI 3 "const_1_to_3_operand"))] - UNSPEC_PRED_X) - (match_operand:VNx2DI 1 "register_operand" "w")))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "adr\t%0.d, [%1.d, %2.d, uxtw %3]" - "&& !CONSTANT_P (operands[5])" - { -@@ -6967,7 +6979,7 @@ - (match_operand: 3 "register_operand" "w, w")] - MATMUL) - (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))] -- "TARGET_SVE_I8MM" -+ "TARGET_SVE_I8MM && TARGET_NON_STREAMING" - "@ - mmla\\t%0.s, %2.b, %3.b - movprfx\t%0, %1\;mmla\\t%0.s, %2.b, %3.b" -@@ -7538,7 +7550,7 @@ - (match_operand:SVE_MATMULF 3 "register_operand" "w, w") - (match_operand:SVE_MATMULF 1 "register_operand" "0, w")] - FMMLA))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - \\t%0., %2., %3. - movprfx\t%0, %1\;\\t%0., %2., %3." -@@ -8601,7 +8613,7 @@ - (match_operand: 1 "register_operand") - (match_operand:SVE_FULL_F 2 "register_operand")] - UNSPEC_FADDA))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - { - operands[3] = aarch64_ptrue_reg (mode); - } -@@ -8614,7 +8626,7 @@ - (match_operand: 1 "register_operand" "0") - (match_operand:SVE_FULL_F 2 "register_operand" "w")] - UNSPEC_FADDA))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "fadda\t%0, %3, %0, %2." - ) - -@@ -8668,7 +8680,7 @@ - [(match_operand: 1 "register_operand" "Upl") - (match_operand:SVE_FULL_SD 2 "register_operand" "w")] - UNSPEC_SVE_COMPACT))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "compact\t%0., %1, %2." - ) - -diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md -index f138f4be4..36555f65c 100644 ---- a/gcc/config/aarch64/aarch64-sve2.md -+++ b/gcc/config/aarch64/aarch64-sve2.md -@@ -109,7 +109,7 @@ - (match_operand: 3 "register_operand" "w, w") - (mem:BLK (scratch))] - UNSPEC_LDNT1_GATHER))] -- "TARGET_SVE2" -+ "TARGET_SVE2 && TARGET_NON_STREAMING" - "@ - ldnt1\t%0., %1/z, [%3.] - ldnt1\t%0., %1/z, [%3., %2]" -@@ -129,6 +129,7 @@ - UNSPEC_LDNT1_GATHER))] - UNSPEC_PRED_X))] - "TARGET_SVE2 -+ && TARGET_NON_STREAMING - && (~ & ) == 0" - "@ - ldnt1\t%0., %1/z, [%3.] -@@ -159,7 +160,7 @@ - (match_operand:SVE_FULL_SD 3 "register_operand" "w, w")] - - UNSPEC_STNT1_SCATTER))] -- "TARGET_SVE" -+ "TARGET_SVE && TARGET_NON_STREAMING" - "@ - stnt1\t%3., %0, [%2.] - stnt1\t%3., %0, [%2., %1]" -@@ -176,6 +177,7 @@ - (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))] - UNSPEC_STNT1_SCATTER))] - "TARGET_SVE2 -+ && TARGET_NON_STREAMING - && (~ & ) == 0" - "@ - stnt1\t%3., %0, [%2.] -@@ -2426,7 +2428,7 @@ - (match_operand:SVE_FULL_SDI 2 "register_operand" "w") - (match_operand:SVE_FULL_SDI 3 "register_operand" "w")] - UNSPEC_HISTCNT))] -- "TARGET_SVE2" -+ "TARGET_SVE2 && TARGET_NON_STREAMING" - "histcnt\t%0., %1/z, %2., %3." - ) - -@@ -2436,7 +2438,7 @@ - [(match_operand:VNx16QI_ONLY 1 "register_operand" "w") - (match_operand:VNx16QI_ONLY 2 "register_operand" "w")] - UNSPEC_HISTSEG))] -- "TARGET_SVE2" -+ "TARGET_SVE2 && TARGET_NON_STREAMING" - "histseg\t%0., %1., %2." - ) - -@@ -2460,7 +2462,7 @@ - SVE2_MATCH)] - UNSPEC_PRED_Z)) - (clobber (reg:CC_NZC CC_REGNUM))] -- "TARGET_SVE2" -+ "TARGET_SVE2 && TARGET_NON_STREAMING" - "\t%0., %1/z, %3., %4." - ) - -@@ -2491,6 +2493,7 @@ - SVE2_MATCH)] - UNSPEC_PRED_Z))] - "TARGET_SVE2 -+ && TARGET_NON_STREAMING - && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" - "\t%0., %1/z, %2., %3." - "&& !rtx_equal_p (operands[4], operands[6])" -@@ -2518,6 +2521,7 @@ - UNSPEC_PTEST)) - (clobber (match_scratch: 0 "=Upa"))] - "TARGET_SVE2 -+ && TARGET_NON_STREAMING - && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" - "\t%0., %1/z, %2., %3." - "&& !rtx_equal_p (operands[4], operands[6])" -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index a3c83a3b1..8f0ac2cde 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -252,6 +252,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - #define AARCH64_ISA_MOPS (aarch64_isa_flags & AARCH64_FL_MOPS) - #define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64) - -+/* The current function is a normal non-streaming function. */ -+#define TARGET_NON_STREAMING (AARCH64_ISA_SM_OFF) -+ - /* Crypto is an optional extension to AdvSIMD. */ - #define TARGET_CRYPTO (AARCH64_ISA_CRYPTO) - -@@ -290,16 +293,16 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - #define TARGET_SVE2 (AARCH64_ISA_SVE2) - - /* SVE2 AES instructions, enabled through +sve2-aes. */ --#define TARGET_SVE2_AES (AARCH64_ISA_SVE2_AES) -+#define TARGET_SVE2_AES (AARCH64_ISA_SVE2_AES && TARGET_NON_STREAMING) - - /* SVE2 BITPERM instructions, enabled through +sve2-bitperm. */ --#define TARGET_SVE2_BITPERM (AARCH64_ISA_SVE2_BITPERM) -+#define TARGET_SVE2_BITPERM (AARCH64_ISA_SVE2_BITPERM && TARGET_NON_STREAMING) - - /* SVE2 SHA3 instructions, enabled through +sve2-sha3. */ --#define TARGET_SVE2_SHA3 (AARCH64_ISA_SVE2_SHA3) -+#define TARGET_SVE2_SHA3 (AARCH64_ISA_SVE2_SHA3 && TARGET_NON_STREAMING) - - /* SVE2 SM4 instructions, enabled through +sve2-sm4. */ --#define TARGET_SVE2_SM4 (AARCH64_ISA_SVE2_SM4) -+#define TARGET_SVE2_SM4 (AARCH64_ISA_SVE2_SM4 && TARGET_NON_STREAMING) - - /* SME instructions, enabled through +sme. Note that this does not - imply anything about the state of PSTATE.SM. */ -diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md -index 8dd2035bc..226dea48a 100644 ---- a/gcc/config/aarch64/iterators.md -+++ b/gcc/config/aarch64/iterators.md -@@ -2706,7 +2706,7 @@ - - (define_int_iterator SVE_FP_UNARY [UNSPEC_FRECPE UNSPEC_RSQRTE]) - --(define_int_iterator SVE_FP_UNARY_INT [UNSPEC_FEXPA]) -+(define_int_iterator SVE_FP_UNARY_INT [(UNSPEC_FEXPA "TARGET_NON_STREAMING")]) - - (define_int_iterator SVE_INT_SHIFT_IMM [UNSPEC_ASRD - (UNSPEC_SQSHLU "TARGET_SVE2") -@@ -2720,7 +2720,7 @@ - (define_int_iterator SVE_BFLOAT_TERNARY_LONG [UNSPEC_BFDOT - UNSPEC_BFMLALB - UNSPEC_BFMLALT -- UNSPEC_BFMMLA]) -+ (UNSPEC_BFMMLA "TARGET_NON_STREAMING")]) - - (define_int_iterator SVE_BFLOAT_TERNARY_LONG_LANE [UNSPEC_BFDOT - UNSPEC_BFMLALB -diff --git a/gcc/testsuite/g++.target/aarch64/sve/aarch64-ssve.exp b/gcc/testsuite/g++.target/aarch64/sve/aarch64-ssve.exp -new file mode 100644 -index 000000000..d6a5a561a ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/aarch64-ssve.exp -@@ -0,0 +1,308 @@ -+# Specific regression driver for AArch64 SME. -+# Copyright (C) 2009-2023 Free Software Foundation, Inc. -+# -+# This file is part of GCC. -+# -+# GCC is free software; you can redistribute it and/or modify it -+# under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3, or (at your option) -+# any later version. -+# -+# GCC is distributed in the hope that it will be useful, but -+# WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# . */ -+ -+# Test whether certain SVE instructions are accepted or rejected in -+# SME streaming mode. -+ -+# Exit immediately if this isn't an AArch64 target. -+if {![istarget aarch64*-*-*] } { -+ return -+} -+ -+load_lib gcc-defs.exp -+ -+gcc_parallel_test_enable 0 -+ -+# Code shared by all tests. -+set preamble { -+#include -+ -+#pragma GCC target "+i8mm+f32mm+f64mm+sve2+sve2-bitperm+sve2-sm4+sve2-aes+sve2-sha3+sme" -+ -+extern svbool_t &pred; -+ -+extern svint8_t &s8; -+extern svint32_t &s32; -+ -+extern svuint8_t &u8; -+extern svuint16_t &u16; -+extern svuint32_t &u32; -+extern svuint64_t &u64; -+ -+extern svbfloat16_t &bf16; -+extern svfloat32_t &f32; -+ -+extern void *void_ptr; -+ -+extern int8_t *s8_ptr; -+extern int16_t *s16_ptr; -+extern int32_t *s32_ptr; -+ -+extern uint8_t *u8_ptr; -+extern uint16_t *u16_ptr; -+extern uint32_t *u32_ptr; -+extern uint64_t *u64_ptr; -+ -+extern uint64_t indx; -+} -+ -+# Wrap a standalone call in a streaming-compatible function. -+set sc_harness { -+void -+foo () [[arm::streaming_compatible]] -+{ -+ $CALL; -+} -+} -+ -+# HARNESS is some source code that should be appended to the preamble -+# variable defined above. It includes the string "$CALL", which should be -+# replaced by the function call in CALL. The result after both steps is -+# a complete C++ translation unit. -+# -+# Try compiling the C++ code and see what output GCC produces. -+# The expected output is either: -+# -+# - empty, if SHOULD_PASS is true -+# - a message rejecting CALL in streaming mode, if SHOULD_PASS is false -+# -+# CALL is simple enough that it can be used in test names. -+proc check_ssve_call { harness name call should_pass } { -+ global preamble -+ -+ set filename test-[pid] -+ set fd [open $filename.cc w] -+ puts $fd $preamble -+ puts -nonewline $fd [string map [list {$CALL} $call] $harness] -+ close $fd -+ remote_download host $filename.cc -+ -+ set test "streaming SVE call $name" -+ -+ set gcc_output [g++_target_compile $filename.cc $filename.s assembly ""] -+ remote_file build delete $filename.cc $filename.s -+ -+ if { [string equal $gcc_output ""] } { -+ if { $should_pass } { -+ pass $test -+ } else { -+ fail $test -+ } -+ return -+ } -+ -+ set lines [split $gcc_output "\n"] -+ set error_text "cannot be called when SME streaming mode is enabled" -+ if { [llength $lines] == 3 -+ && [string first "In function" [lindex $lines 0]] >= 0 -+ && [string first $error_text [lindex $lines 1]] >= 0 -+ && [string equal [lindex $lines 2] ""] } { -+ if { $should_pass } { -+ fail $test -+ } else { -+ pass $test -+ } -+ return -+ } -+ -+ verbose -log "$test: unexpected output" -+ fail $test -+} -+ -+# Apply check_ssve_call to each line in CALLS. The other arguments are -+# as for check_ssve_call. -+proc check_ssve_calls { harness calls should_pass } { -+ foreach line [split $calls "\n"] { -+ set call [string trim $line] -+ if { [string equal $call ""] } { -+ continue -+ } -+ check_ssve_call $harness "$call" $call $should_pass -+ } -+} -+ -+# A small selection of things that are valid in streaming mode. -+set streaming_ok { -+ s8 = svadd_x (pred, s8, s8) -+ s8 = svld1 (pred, s8_ptr) -+} -+ -+# This order follows the list in the SME manual. -+set nonstreaming_only { -+ u32 = svadrb_offset (u32, u32) -+ u64 = svadrb_offset (u64, u64) -+ u32 = svadrh_index (u32, u32) -+ u64 = svadrh_index (u64, u64) -+ u32 = svadrw_index (u32, u32) -+ u64 = svadrw_index (u64, u64) -+ u32 = svadrd_index (u32, u32) -+ u64 = svadrd_index (u64, u64) -+ u8 = svaesd (u8, u8) -+ u8 = svaese (u8, u8) -+ u8 = svaesimc (u8) -+ u8 = svaesmc (u8) -+ u8 = svbdep (u8, u8) -+ u8 = svbext (u8, u8) -+ f32 = svbfmmla (f32, bf16, bf16) -+ u8 = svbgrp (u8, u8) -+ u32 = svcompact (pred, u32) -+ f32 = svadda (pred, 1.0f, f32) -+ f32 = svexpa (u32) -+ f32 = svmmla (f32, f32, f32) -+ f32 = svtmad (f32, f32, 0) -+ f32 = svtsmul (f32, u32) -+ f32 = svtssel (f32, u32) -+ u32 = svhistcnt_z (pred, u32, u32) -+ u8 = svhistseg (u8, u8) -+ u32 = svld1ub_gather_offset_u32 (pred, u8_ptr, u32) -+ u32 = svld1ub_gather_offset_u32 (pred, u32, 1) -+ u64 = svld1_gather_index (pred, u64_ptr, u64) -+ u64 = svld1_gather_index_u64 (pred, u64, 1) -+ u32 = svld1uh_gather_index_u32 (pred, u16_ptr, u32) -+ u32 = svld1uh_gather_index_u32 (pred, u32, 1) -+ u8 = svld1ro (pred, u8_ptr + indx) -+ u8 = svld1ro (pred, u8_ptr + 1) -+ u16 = svld1ro (pred, u16_ptr + indx) -+ u16 = svld1ro (pred, u16_ptr + 1) -+ u32 = svld1ro (pred, u32_ptr + indx) -+ u32 = svld1ro (pred, u32_ptr + 1) -+ u64 = svld1ro (pred, u64_ptr + indx) -+ u64 = svld1ro (pred, u64_ptr + 1) -+ u32 = svld1sb_gather_offset_u32 (pred, s8_ptr, u32) -+ u32 = svld1sb_gather_offset_u32 (pred, u32, 1) -+ u32 = svld1sh_gather_index_u32 (pred, s16_ptr, u32) -+ u32 = svld1sh_gather_index_u32 (pred, u32, 1) -+ u64 = svld1sw_gather_index_u64 (pred, s32_ptr, u64) -+ u64 = svld1sw_gather_index_u64 (pred, u64, 1) -+ u64 = svld1uw_gather_index_u64 (pred, u32_ptr, u64) -+ u64 = svld1uw_gather_index_u64 (pred, u64, 1) -+ u32 = svld1_gather_index (pred, u32_ptr, u32) -+ u32 = svld1_gather_index_u32 (pred, u32, 1) -+ u8 = svldff1(pred, u8_ptr) -+ u16 = svldff1ub_u16(pred, u8_ptr) -+ u32 = svldff1ub_u32(pred, u8_ptr) -+ u64 = svldff1ub_u64(pred, u8_ptr) -+ u32 = svldff1ub_gather_offset_u32 (pred, u8_ptr, u32) -+ u32 = svldff1ub_gather_offset_u32 (pred, u32, 1) -+ u64 = svldff1(pred, u64_ptr) -+ u64 = svldff1_gather_index (pred, u64_ptr, u64) -+ u64 = svldff1_gather_index_u64 (pred, u64, 1) -+ u16 = svldff1(pred, u16_ptr) -+ u32 = svldff1uh_u32(pred, u16_ptr) -+ u64 = svldff1uh_u64(pred, u16_ptr) -+ u32 = svldff1uh_gather_offset_u32 (pred, u16_ptr, u32) -+ u32 = svldff1uh_gather_offset_u32 (pred, u32, 1) -+ u16 = svldff1sb_u16(pred, s8_ptr) -+ u32 = svldff1sb_u32(pred, s8_ptr) -+ u64 = svldff1sb_u64(pred, s8_ptr) -+ u32 = svldff1sb_gather_offset_u32 (pred, s8_ptr, u32) -+ u32 = svldff1sb_gather_offset_u32 (pred, u32, 1) -+ u32 = svldff1sh_u32(pred, s16_ptr) -+ u64 = svldff1sh_u64(pred, s16_ptr) -+ u32 = svldff1sh_gather_offset_u32 (pred, s16_ptr, u32) -+ u32 = svldff1sh_gather_offset_u32 (pred, u32, 1) -+ u64 = svldff1sw_u64(pred, s32_ptr) -+ u64 = svldff1sw_gather_offset_u64 (pred, s32_ptr, u64) -+ u64 = svldff1sw_gather_offset_u64 (pred, u64, 1) -+ u32 = svldff1(pred, u32_ptr) -+ u32 = svldff1_gather_index (pred, u32_ptr, u32) -+ u32 = svldff1_gather_index_u32 (pred, u32, 1) -+ u64 = svldff1uw_u64(pred, u32_ptr) -+ u64 = svldff1uw_gather_offset_u64 (pred, u32_ptr, u64) -+ u64 = svldff1uw_gather_offset_u64 (pred, u64, 1) -+ u8 = svldnf1(pred, u8_ptr) -+ u16 = svldnf1ub_u16(pred, u8_ptr) -+ u32 = svldnf1ub_u32(pred, u8_ptr) -+ u64 = svldnf1ub_u64(pred, u8_ptr) -+ u64 = svldnf1(pred, u64_ptr) -+ u16 = svldnf1(pred, u16_ptr) -+ u32 = svldnf1uh_u32(pred, u16_ptr) -+ u64 = svldnf1uh_u64(pred, u16_ptr) -+ u16 = svldnf1sb_u16(pred, s8_ptr) -+ u32 = svldnf1sb_u32(pred, s8_ptr) -+ u64 = svldnf1sb_u64(pred, s8_ptr) -+ u32 = svldnf1sh_u32(pred, s16_ptr) -+ u64 = svldnf1sh_u64(pred, s16_ptr) -+ u64 = svldnf1sw_u64(pred, s32_ptr) -+ u32 = svldnf1(pred, u32_ptr) -+ u64 = svldnf1uw_u64(pred, u32_ptr) -+ u32 = svldnt1ub_gather_offset_u32 (pred, u8_ptr, u32) -+ u32 = svldnt1ub_gather_offset_u32 (pred, u32, 1) -+ u64 = svldnt1_gather_index (pred, u64_ptr, u64) -+ u64 = svldnt1_gather_index_u64 (pred, u64, 1) -+ u32 = svldnt1uh_gather_offset_u32 (pred, u16_ptr, u32) -+ u32 = svldnt1uh_gather_offset_u32 (pred, u32, 1) -+ u32 = svldnt1sb_gather_offset_u32 (pred, s8_ptr, u32) -+ u32 = svldnt1sb_gather_offset_u32 (pred, u32, 1) -+ u32 = svldnt1sh_gather_offset_u32 (pred, s16_ptr, u32) -+ u32 = svldnt1sh_gather_offset_u32 (pred, u32, 1) -+ u64 = svldnt1sw_gather_offset_u64 (pred, s32_ptr, u64) -+ u64 = svldnt1sw_gather_offset_u64 (pred, u64, 1) -+ u64 = svldnt1uw_gather_offset_u64 (pred, u32_ptr, u64) -+ u64 = svldnt1uw_gather_offset_u64 (pred, u64, 1) -+ u32 = svldnt1_gather_offset (pred, u32_ptr, u32) -+ u32 = svldnt1_gather_offset_u32 (pred, u32, 1) -+ pred = svmatch (pred, u8, u8) -+ pred = svnmatch (pred, u8, u8) -+ u64 = svpmullb_pair (u64, u64) -+ u64 = svpmullt_pair (u64, u64) -+ svprfb_gather_offset (pred, void_ptr, u64, SV_PLDL1KEEP) -+ svprfb_gather_offset (pred, u64, 1, SV_PLDL1KEEP) -+ svprfd_gather_index (pred, void_ptr, u64, SV_PLDL1KEEP) -+ svprfd_gather_index (pred, u64, 1, SV_PLDL1KEEP) -+ svprfh_gather_index (pred, void_ptr, u64, SV_PLDL1KEEP) -+ svprfh_gather_index (pred, u64, 1, SV_PLDL1KEEP) -+ svprfw_gather_index (pred, void_ptr, u64, SV_PLDL1KEEP) -+ svprfw_gather_index (pred, u64, 1, SV_PLDL1KEEP) -+ u64 = svrax1 (u64, u64) -+ pred = svrdffr () -+ pred = svrdffr_z (pred) -+ svsetffr () -+ u32 = svsm4e (u32, u32) -+ u32 = svsm4ekey (u32, u32) -+ s32 = svmmla (s32, s8, s8) -+ svst1b_scatter_offset (pred, u8_ptr, u32, u32) -+ svst1b_scatter_offset (pred, u32, 1, u32) -+ svst1_scatter_index (pred, u64_ptr, u64, u64) -+ svst1_scatter_index (pred, u64, 1, u64) -+ svst1h_scatter_index (pred, u16_ptr, u32, u32) -+ svst1h_scatter_index (pred, u32, 1, u32) -+ svst1w_scatter_index (pred, u32_ptr, u64, u64) -+ svst1w_scatter_index (pred, u64, 1, u64) -+ svst1_scatter_index (pred, u32_ptr, u32, u32) -+ svst1_scatter_index (pred, u32, 1, u32) -+ svstnt1b_scatter_offset (pred, u8_ptr, u32, u32) -+ svstnt1b_scatter_offset (pred, u32, 1, u32) -+ svstnt1_scatter_offset (pred, u64_ptr, u64, u64) -+ svstnt1_scatter_offset (pred, u64, 1, u64) -+ svstnt1h_scatter_offset (pred, u16_ptr, u32, u32) -+ svstnt1h_scatter_offset (pred, u32, 1, u32) -+ svstnt1w_scatter_offset (pred, u32_ptr, u64, u64) -+ svstnt1w_scatter_offset (pred, u64, 1, u64) -+ svstnt1_scatter_offset (pred, u32_ptr, u32, u32) -+ svstnt1_scatter_offset (pred, u32, 1, u32) -+ u32 = svmmla (u32, u8, u8) -+ s32 = svusmmla (s32, u8, s8) -+ svwrffr (pred) -+} -+ -+check_ssve_calls $sc_harness $streaming_ok 1 -+check_ssve_calls $sc_harness $nonstreaming_only 0 -+ -+gcc_parallel_test_enable 1 -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp b/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp -index 38140413a..45270be60 100644 ---- a/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp -@@ -50,6 +50,7 @@ if { [info exists gcc_runtest_parallelize_limit_minor] } { - torture-init - set-torture-options { - "-std=c++98 -O0 -g" -+ "-std=c++11 -O0 -DSTREAMING_COMPATIBLE" - "-std=c++98 -O1 -g" - "-std=c++11 -O2 -g" - "-std=c++14 -O3 -g" -diff --git a/gcc/testsuite/g++.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp b/gcc/testsuite/g++.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp -index 78e8ecae7..0a7151220 100644 ---- a/gcc/testsuite/g++.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp -+++ b/gcc/testsuite/g++.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp -@@ -53,6 +53,7 @@ if { [info exists gcc_runtest_parallelize_limit_minor] } { - torture-init - set-torture-options { - "-std=c++98 -O0 -g" -+ "-std=c++11 -O0 -DSTREAMING_COMPATIBLE" - "-std=c++98 -O1 -g" - "-std=c++11 -O2 -g" - "-std=c++14 -O3 -g" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp b/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp -index a271f1793..8cb2b9bb4 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp -@@ -50,6 +50,7 @@ if { [info exists gcc_runtest_parallelize_limit_minor] } { - torture-init - set-torture-options { - "-std=c90 -O0 -g" -+ "-std=c90 -O0 -DSTREAMING_COMPATIBLE" - "-std=c90 -O1 -g" - "-std=c99 -O2 -g" - "-std=c11 -O3 -g" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c -index 6c6bfa1c2..4d6ec2d65 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c -index 8b2a1dd1c..04afbcee6 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c -index 90a56420a..8b4c7d1ff 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrb.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrb.c -index a61eec971..5dcdc54b0 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrb.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrb.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrd.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrd.c -index 970485bd6..d9d16ce3f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrd.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrd.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrh.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrh.c -index d06f51fe3..a358c2403 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrh.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrh.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrw.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrw.c -index b23f25a11..bd1e9af0a 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrw.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrw.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c -index b1d98fbf5..4bb2912a4 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ - /* { dg-require-effective-target aarch64_asm_bf16_ok } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f32.c -index 2e80d6830..d261ec00b 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f64.c -index e0bc33efe..024b0510f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s32.c -index e4634982b..0b32dfb60 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s64.c -index 71cb97b8a..38688dbca 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u32.c -index 954329a0b..a3e89cc97 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u64.c -index ec664845f..602ab048c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f16.c -index 5a5411e46..87c26e6ea 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f32.c -index 4ded1c575..5e9839537 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f64.c -index c31f9ccb5..b117df2a4 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f32.c -index 00b68ff29..8b972f61b 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f64.c -index 47127960c..413d4d62d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s32.c -index 9b6335547..b3df7d154 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s64.c -index c9cea3ad8..0da1e5296 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u32.c -index 2cccc8d49..a3304c419 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u64.c -index 6ee1d48ab..73ef94805 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_bf16.c -index cb1801778..fe909b666 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_bf16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_bf16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ - /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c -index 86081edbd..30ba30639 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ - /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c -index c8df00f8a..cf62fada9 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ - /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c -index 2fb9d5b74..b9fde4dac 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ - /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c -index 3cd211b16..35b7dd1d2 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ - /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c -index 44b16ed5f..57b6a6567 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ - /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c -index 3aa9a15ee..bd7e28478 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ - /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c -index 49aff5146..143800003 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ - /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c -index 00bf9e129..145b0b7f3 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ - /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c -index 9e9b3290a..9f150631b 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ - /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c -index 64ec62871..8dd75d136 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ - /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c -index 22701320b..f15454586 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - /* { dg-additional-options "-march=armv8.6-a+f64mm" } */ - /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s32.c -index 16a5316a9..06249ad4c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s64.c -index 3f953247e..8d141e133 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u32.c -index 424de65a6..77836cbf6 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u64.c -index aa375bea2..f4b24ab41 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s32.c -index ed07b4dfc..1b9782368 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s64.c -index 20ca42720..2009dec81 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u32.c -index e3a85a23f..0e1d48966 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u64.c -index 3a0094fba..115d7d3a9 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_s64.c -index 4d076b486..5dc44421c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_u64.c -index ffa85eb3e..fac4ec41c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s32.c -index a9c418265..f57df4226 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s64.c -index 99af86ddf..0c069fa4f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u32.c -index 77c7e0a2d..98102e013 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u64.c -index b605f8b67..f86a34d12 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s32.c -index 84fb5c335..139371878 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s64.c -index 447001793..f0338aae6 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u32.c -index 09d3cc8c2..5810bc0ac 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u64.c -index f3dcf03cd..52e95abb9 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_s64.c -index f4e9d5db9..0889eefdd 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_u64.c -index 854d19233..fb144d756 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_bf16.c -index 80f646870..1f997480e 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_bf16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_bf16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f16.c -index 13ce863c9..60405d0a0 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f32.c -index 2fcc63390..225e9969d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f64.c -index cc15b927a..366e36afd 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f32.c -index 7e330c042..b84b9bcdd 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f64.c -index d0e47f0bf..e779b0712 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s32.c -index 66bf0f746..17e0f9aa2 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s64.c -index faf71bf9d..030f187b1 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u32.c -index 41c7dc9cf..fb8653016 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u64.c -index 8b53ce94f..5be30a2d8 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s16.c -index 1d5fde0e6..61d242c07 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s32.c -index 97a36e884..afe748ef9 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s64.c -index c018a4c1c..bee222855 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s8.c -index cf620d1f4..ccaac2ca4 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s8.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u16.c -index 1fa819296..c8416f99d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u32.c -index 5224ec40a..ec26a82ca 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u64.c -index 18e87f2b8..e211f1794 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u8.c -index 83883fca4..24dfe452f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u8.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s32.c -index c2a676807..f7e3977bf 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s64.c -index 2f2a04d24..7f2a829a8 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u32.c -index e3e83a205..685f62808 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u64.c -index 769f2c266..49a7a8536 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s16.c -index e0a748c6a..1d30c7ba6 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s32.c -index 86716da9b..c2b3f42cb 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s64.c -index e7a4aa6e9..585a6241e 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u16.c -index 69ba96d52..ebb2f0f66 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u32.c -index e1a1873f0..f4ea96cf9 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u64.c -index 0a49cbcc0..e3735239c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s32.c -index b633335dc..67e70361b 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s64.c -index 32a4309b6..5755c79bc 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u32.c -index 73a9be892..a58489995 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u64.c -index 94ea73b63..b18751209 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s32.c -index 81b64e836..bffac9365 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s64.c -index 453b3ff24..a4acb1e5e 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u32.c -index bbbed79dc..828288cd8 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u64.c -index 5430e256b..e3432c46c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_s64.c -index e5da8a83d..78aa34ec0 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_u64.c -index 411428756..9dad1212c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_s64.c -index d795ace63..33b6c10dd 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_u64.c -index 6caf2f504..e8c9c845f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s32.c -index af0be08d2..b1c9c8135 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s64.c -index 43124dd89..9ab776a21 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u32.c -index 90c4e58a2..745740dfa 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u64.c -index 302623a40..3a7bd6a43 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s16.c -index 88ad2d1dc..ade0704f7 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s32.c -index e8e06411f..5d3e0ce95 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s64.c -index 21d02ddb7..08ae802ee 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u16.c -index 904cb027e..d8dc5e157 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u32.c -index a40012318..042ae5a9f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u64.c -index a9a98a683..d0844fa51 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s32.c -index d02e44342..12460105d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s64.c -index 663a73d27..536331371 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u32.c -index 5e0ef067f..602e6a686 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u64.c -index 1cfae1b95..4b307b341 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s32.c -index abb3d769a..db205b1ef 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s64.c -index 6e330e8e8..0eac877eb 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u32.c -index 4eb5323e9..266ecf167 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u64.c -index ebac26e7d..bdd725e4a 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_s64.c -index 6c0daea52..ab2c79da7 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_u64.c -index 0e400c679..361d7de05 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_s64.c -index ac9779899..8adcec3d5 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_u64.c -index c7ab06171..781fc1a9c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_bf16.c -index 947a896e7..93b4425ec 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_bf16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_bf16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f16.c -index cf0178688..d47d748c7 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f32.c -index 83b73ec8e..e390d6857 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f64.c -index 778096e82..97a0e39e7 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s16.c -index 592c8237d..21008d7f9 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s32.c -index 634092af8..8a3d795b3 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s64.c -index 4a03f6676..c0b57a2f3 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s8.c -index 162ee176a..6714152d9 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s8.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u16.c -index e920ac43b..3df404d77 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u32.c -index 65e28c5c2..e899a4a6f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u64.c -index 70d3f27d8..ab69656cf 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u8.c -index 5c29f1d19..5d7b07497 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u8.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s16.c -index e04b9a788..5b53c885d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s32.c -index 0553fc98d..992eba7cc 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s64.c -index 61a474fdf..99e0f8bd0 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u16.c -index be63d8bf9..fe23913f2 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u32.c -index 4f52490b4..6deb39770 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u64.c -index 73f50d182..e76457da6 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s32.c -index 08c7dc6dd..e49a7f8ed 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s64.c -index 6a41bc26b..00b40281c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u32.c -index 2f7718730..41560af33 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u64.c -index d7f1a68a4..0acf4b349 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_s64.c -index 5b483e4aa..578212898 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_u64.c -index 62121ce0a..8249c4c3f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s16.c -index 8fe13411f..e59c451f7 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s32.c -index 50122e3b7..d788576e2 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s64.c -index d7cce11b6..b21fdb964 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u16.c -index 7bf82c3b6..1ae41b002 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u32.c -index e2fef064b..e3d8fb3b5 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u64.c -index 57c61e122..df9a0c07f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s32.c -index ed9686c4e..c3467d846 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s64.c -index a3107f562..bf3355e99 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u32.c -index 93d5abaf7..bcc3eb3fd 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u64.c -index 32d36a84c..4c01c13ac 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_s64.c -index 373922791..3c6556591 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_u64.c -index b3c3be1d0..b222a0dc6 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f32.c -index f66dbf397..e1c7f47dc 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-require-effective-target aarch64_asm_f32mm_ok } */ - /* { dg-additional-options "-march=armv8.2-a+f32mm" } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f64.c -index 49dc0607c..c45caa700 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-require-effective-target aarch64_asm_f64mm_ok } */ - /* { dg-additional-options "-march=armv8.2-a+f64mm" } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_s32.c -index e7ce009ac..dc155461c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-require-effective-target aarch64_asm_i8mm_ok } */ - /* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_u32.c -index 81f5166fb..43d601a47 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-require-effective-target aarch64_asm_i8mm_ok } */ - /* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb_gather.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb_gather.c -index c4bfbbbf7..f32cfbfcb 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb_gather.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb_gather.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd_gather.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd_gather.c -index a84acb1a1..8a4293b62 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd_gather.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd_gather.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh_gather.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh_gather.c -index 04b7a1575..6beca4b8e 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh_gather.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh_gather.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw_gather.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw_gather.c -index 2bbae1b9e..6af44ac82 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw_gather.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw_gather.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rdffr_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rdffr_1.c -index 5564e967f..7e28ef641 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rdffr_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rdffr_1.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f32.c -index cb6774ad0..1efd43445 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f64.c -index fe978bbe5..f50c43e83 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s32.c -index d244e701a..bb6fb10b8 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s64.c -index 5c4ebf440..19ec78e9e 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u32.c -index fe3f7259f..57fbb91b0 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u64.c -index 232123566..60018be5b 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s32.c -index d59033356..fb1bb29db 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s64.c -index c7a35f1b4..65ee9a071 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u32.c -index e098cb9b7..ceec61939 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u64.c -index 058d1313f..aeedbc6d7 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s32.c -index 2a23d41f3..2d69d085b 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s64.c -index 6a1adb056..3e5733ef9 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u32.c -index 12197315d..5cd330a3d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u64.c -index 7021ea68f..0ee9948cb 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_s64.c -index 2363f592b..f18bedce1 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_u64.c -index 767c009b4..6850865ec 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h -index 2da61ff5c..d8916809b 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h -@@ -11,10 +11,17 @@ - #error "Please define -DTEST_OVERLOADS or -DTEST_FULL" - #endif - -+#ifdef STREAMING_COMPATIBLE -+#define ATTR __arm_streaming_compatible -+#else -+#define ATTR -+#endif -+ - #ifdef __cplusplus --#define PROTO(NAME, RET, ARGS) extern "C" RET NAME ARGS; RET NAME ARGS -+#define PROTO(NAME, RET, ARGS) \ -+ extern "C" RET NAME ARGS ATTR; RET NAME ARGS ATTR - #else --#define PROTO(NAME, RET, ARGS) RET NAME ARGS -+#define PROTO(NAME, RET, ARGS) RET NAME ARGS ATTR - #endif - - #define TEST_UNIFORM_Z(NAME, TYPE, CODE1, CODE2) \ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f16.c -index 3a00716e3..c0b03a0d3 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f32.c -index b73d420fb..8eef8a12c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f64.c -index fc31928a6..5c96c5579 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f16.c -index 94bc696eb..9deed667f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f32.c -index d0ec91882..749ea8664 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f64.c -index 23e0da3f7..053abcb26 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f16.c -index e7c3ea03b..3ab251fe0 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f32.c -index 022573a19..6c6471c5e 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f64.c -index ffcdf4224..9559e0f35 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usmmla_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usmmla_s32.c -index 9440f3fd9..a0dd7e334 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usmmla_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usmmla_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-require-effective-target aarch64_asm_i8mm_ok } */ - /* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp b/gcc/testsuite/gcc.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp -index e08cd6121..2fb27fb5e 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/aarch64-sve2-acle-asm.exp -@@ -52,6 +52,7 @@ if { [info exists gcc_runtest_parallelize_limit_minor] } { - torture-init - set-torture-options { - "-std=c90 -O0 -g" -+ "-std=c90 -O0 -DSTREAMING_COMPATIBLE" - "-std=c90 -O1 -g" - "-std=c99 -O2 -g" - "-std=c11 -O3 -g" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesd_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesd_u8.c -index 622f5cf46..484f7251f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesd_u8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesd_u8.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aese_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aese_u8.c -index 6555bbb1d..6869bbd05 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aese_u8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aese_u8.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesimc_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesimc_u8.c -index 4630595ff..534ffe06f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesimc_u8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesimc_u8.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesmc_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesmc_u8.c -index 6e8acf48f..1660a8eaf 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesmc_u8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/aesmc_u8.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u16.c -index 14230850f..c1a4e1061 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u32.c -index 7f08df4ba..4f14cc4c4 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u64.c -index 7f7cbbeeb..091253ec6 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u8.c -index b420323b9..deb1ad27d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bdep_u8.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u16.c -index 50a647918..9efa501ef 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u32.c -index 9f98b843c..18963da5b 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u64.c -index 9dbaec1b7..91591f93b 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u8.c -index 81ed5a463..1211587ef 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bext_u8.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u16.c -index 70aeae3f3..72868bea7 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u32.c -index 6e19e38d8..c8923816f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u64.c -index 27fa40f47..86989529f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u8.c -index b667e03e3..5cd941a7a 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/bgrp_u8.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_s32.c -index 7bf783a7c..53d6c5c56 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_s64.c -index 001f5f0f1..c6d9862e3 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_u32.c -index d93091adc..cb11a0026 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_u64.c -index 3b8898023..0bb06cdb4 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histcnt_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histseg_s8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histseg_s8.c -index 380ccdf85..ce3458e5e 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histseg_s8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histseg_s8.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histseg_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histseg_u8.c -index f43292f0c..7b1eff811 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histseg_u8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/histseg_u8.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f32.c -index 102810e25..17e3673a4 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f64.c -index a0ed71227..8ce32e9f9 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_f64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s32.c -index 94c64971c..b7e1d7a99 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s64.c -index a0aa6703f..b0789ad21 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u32.c -index e1479684e..df09eaa76 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u64.c -index 77cdcfeba..5f185ea82 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s32.c -index bb729483f..71fece575 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s64.c -index de5b69314..1183e72f0 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u32.c -index d01ec18e4..4d5e6e771 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u64.c -index b96e94353..ed329a23f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sb_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s32.c -index 1dcfbc0fb..6dbd6cea0 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s64.c -index 4166ed0a6..4ea3335a2 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u32.c -index 7680344da..d55451519 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u64.c -index 2427c83ab..18c8ca44e 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sh_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_s64.c -index 2f538e847..41bff31d0 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_u64.c -index ace1c2f2f..30b8f6948 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1sw_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s32.c -index d3b29eb19..8750d11af 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s64.c -index 3bc406620..f7981991a 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u32.c -index 0af4b40b8..4d5ee4ef4 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u64.c -index fe28d78ed..005c29c06 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1ub_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s32.c -index 985432615..92613b166 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s64.c -index 3c5baeee6..be2e6d126 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u32.c -index 4d945e9f9..4d122059f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u64.c -index 680238ac4..e3bc1044c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uh_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_s64.c -index 787ae9def..9efa4b2cb 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_u64.c -index 4810bc3c4..4ded4454d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/ldnt1uw_gather_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_s16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_s16.c -index baebc7693..d0ce81294 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_s16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_s16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_s8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_s8.c -index f35a75379..03473906a 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_s8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_s8.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_u16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_u16.c -index 0bdf4462f..2a8b4d250 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_u16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_u16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_u8.c -index 6d78692bd..8409276d9 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_u8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/match_u8.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_s16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_s16.c -index 935b19a10..044ba1de3 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_s16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_s16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_s8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_s8.c -index 8a00b30f3..6c2d890fa 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_s8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_s8.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_u16.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_u16.c -index 868c20a11..863e31054 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_u16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_u16.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_u8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_u8.c -index af6b58165..a62783db7 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_u8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/nmatch_u8.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pmullb_pair_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pmullb_pair_u64.c -index 944609214..1fd85e0ce 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pmullb_pair_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pmullb_pair_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pmullt_pair_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pmullt_pair_u64.c -index 90e2e991f..300d885ab 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pmullt_pair_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/pmullt_pair_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/rax1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/rax1_s64.c -index ea80d40db..9dbc71839 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/rax1_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/rax1_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/rax1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/rax1_u64.c -index b237c7edd..5caa2a544 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/rax1_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/rax1_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sm4e_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sm4e_u32.c -index 0ff5746d8..14194eef6 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sm4e_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sm4e_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sm4ekey_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sm4ekey_u32.c -index 58ad33c5d..e72384108 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sm4ekey_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/sm4ekey_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f32.c -index 3f928e20e..75539f692 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f64.c -index 8a35c76b9..c0d47d0c1 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_f64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s32.c -index bd6002682..80fb3e869 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s64.c -index 0bfa2616e..edd2bc418 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u32.c -index fbfa008c1..a6e5059de 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u64.c -index c283135c4..067e5b109 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1_scatter_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s32.c -index bf6ba5973..498fe82e5 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s64.c -index a24d0c89c..614f5fb1a 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u32.c -index 2b05a7720..ce2c482af 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u64.c -index a13c5f5bb..593dc1939 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1b_scatter_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s32.c -index 4e012f61f..b9d06c1c5 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s64.c -index e934a708d..006e0e24d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u32.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u32.c -index db21821eb..8cd7cb86a 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u32.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u32.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u64.c -index 53f930da1..972ee3689 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1h_scatter_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_s64.c -index ec6c837d9..368a17c47 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_s64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_s64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_u64.c -index 3c5d96de4..57d60a350 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_u64.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/stnt1w_scatter_u64.c -@@ -1,3 +1,4 @@ -+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */ - /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ - - #include "test_sve_acle.h" --- -2.33.0 - diff --git a/0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch b/0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch deleted file mode 100644 index 77f70f4..0000000 --- a/0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch +++ /dev/null @@ -1,250 +0,0 @@ -From da06b276b6ae281efad2ec3b982e09b1f4015917 Mon Sep 17 00:00:00 2001 -From: Tamar Christina -Date: Mon, 12 Dec 2022 15:18:56 +0000 -Subject: [PATCH 082/157] [Backport][SME] AArch64: Support new tbranch optab. - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=17ae956c0fa6baac3d22764019d5dd5ebf5c2b11 - -This implements the new tbranch optab for AArch64. - -we cannot emit one big RTL for the final instruction immediately. -The reason that all comparisons in the AArch64 backend expand to separate CC -compares, and separate testing of the operands is for ifcvt. - -The separate CC compare is needed so ifcvt can produce csel, cset etc from the -compares. Unlike say combine, ifcvt can not do recog on a parallel with a -clobber. Should we emit the instruction directly then ifcvt will not be able -to say, make a csel, because we have no patterns which handle zero_extract and -compare. (unlike combine ifcvt cannot transform the extract into an AND). - -While you could provide various patterns for this (and I did try) you end up -with broken patterns because you can't add the clobber to the CC register. If -you do, ifcvt recog fails. - -i.e. - -int -f1 (int x) -{ - if (x & 1) - return 1; - return x; -} - -We lose csel here. - -Secondly the reason the compare with an explicit CC mode is needed is so that -ifcvt can transform the operation into a version that doesn't require the flags -to be set. But it only does so if it know the explicit usage of the CC reg. - -For instance - -int -foo (int a, int b) -{ - return ((a & (1 << 25)) ? 5 : 4); -} - -Doesn't require a comparison, the optimal form is: - -foo(int, int): - ubfx x0, x0, 25, 1 - add w0, w0, 4 - ret - -and no compare is actually needed. If you represent the instruction using an -ANDS instead of a zero_extract then you get close, but you end up with an ands -followed by an add, which is a slower operation. - -gcc/ChangeLog: - - * config/aarch64/aarch64.md (*tb1): Rename to... - (*tb1): ... this. - (tbranch_4): New. - * config/aarch64/iterators.md(ZEROM, zerom): New. - -gcc/testsuite/ChangeLog: - - * gcc.target/aarch64/tbz_1.c: New test. ---- - gcc/config/aarch64/aarch64.md | 33 ++++++-- - gcc/config/aarch64/iterators.md | 2 + - gcc/testsuite/gcc.target/aarch64/tbz_1.c | 95 ++++++++++++++++++++++++ - 3 files changed, 122 insertions(+), 8 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/tbz_1.c - -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index 079c8a3f9..2becc888e 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -953,12 +953,29 @@ - (const_int 1)))] - ) - --(define_insn "*tb1" -+(define_expand "tbranch_3" - [(set (pc) (if_then_else -- (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r") -- (const_int 1) -- (match_operand 1 -- "aarch64_simd_shift_imm_" "n")) -+ (EQL (match_operand:ALLI 0 "register_operand") -+ (match_operand 1 "aarch64_simd_shift_imm_")) -+ (label_ref (match_operand 2 "")) -+ (pc)))] -+ "" -+{ -+ rtx bitvalue = gen_reg_rtx (mode); -+ rtx reg = gen_lowpart (mode, operands[0]); -+ rtx val = GEN_INT (1UL << UINTVAL (operands[1])); -+ emit_insn (gen_and3 (bitvalue, reg, val)); -+ operands[1] = const0_rtx; -+ operands[0] = aarch64_gen_compare_reg (, bitvalue, -+ operands[1]); -+}) -+ -+(define_insn "*tb1" -+ [(set (pc) (if_then_else -+ (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r") -+ (const_int 1) -+ (match_operand 1 -+ "aarch64_simd_shift_imm_" "n")) - (const_int 0)) - (label_ref (match_operand 2 "" "")) - (pc))) -@@ -969,15 +986,15 @@ - { - if (get_attr_far_branch (insn) == 1) - return aarch64_gen_far_branch (operands, 2, "Ltb", -- "\\t%0, %1, "); -+ "\\t%0, %1, "); - else - { - operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1])); -- return "tst\t%0, %1\;\t%l2"; -+ return "tst\t%0, %1\;\t%l2"; - } - } - else -- return "\t%0, %1, %l2"; -+ return "\t%0, %1, %l2"; - } - [(set_attr "type" "branch") - (set (attr "length") -diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md -index 226dea48a..b616f5c9a 100644 ---- a/gcc/config/aarch64/iterators.md -+++ b/gcc/config/aarch64/iterators.md -@@ -1104,6 +1104,8 @@ - - ;; Give the number of bits in the mode - (define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")]) -+(define_mode_attr ZEROM [(QI "SI") (HI "SI") (SI "SI") (DI "DI")]) -+(define_mode_attr zerom [(QI "si") (HI "si") (SI "si") (DI "di")]) - - ;; Give the ordinal of the MSB in the mode - (define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63") -diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c -new file mode 100644 -index 000000000..39deb58e2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c -@@ -0,0 +1,95 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-O2 -std=c99 -fno-unwind-tables -fno-asynchronous-unwind-tables" } */ -+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */ -+ -+#include -+ -+void h(void); -+ -+/* -+** g1: -+** tbnz w[0-9]+, #?0, .L([0-9]+) -+** ret -+** ... -+*/ -+void g1(bool x) -+{ -+ if (__builtin_expect (x, 0)) -+ h (); -+} -+ -+/* -+** g2: -+** tbz w[0-9]+, #?0, .L([0-9]+) -+** b h -+** ... -+*/ -+void g2(bool x) -+{ -+ if (__builtin_expect (x, 1)) -+ h (); -+} -+ -+/* -+** g3_ge: -+** tbnz w[0-9]+, #?31, .L[0-9]+ -+** b h -+** ... -+*/ -+void g3_ge(int x) -+{ -+ if (__builtin_expect (x >= 0, 1)) -+ h (); -+} -+ -+/* -+** g3_gt: -+** cmp w[0-9]+, 0 -+** ble .L[0-9]+ -+** b h -+** ... -+*/ -+void g3_gt(int x) -+{ -+ if (__builtin_expect (x > 0, 1)) -+ h (); -+} -+ -+/* -+** g3_lt: -+** tbz w[0-9]+, #?31, .L[0-9]+ -+** b h -+** ... -+*/ -+void g3_lt(int x) -+{ -+ if (__builtin_expect (x < 0, 1)) -+ h (); -+} -+ -+/* -+** g3_le: -+** cmp w[0-9]+, 0 -+** bgt .L[0-9]+ -+** b h -+** ... -+*/ -+void g3_le(int x) -+{ -+ if (__builtin_expect (x <= 0, 1)) -+ h (); -+} -+ -+/* -+** g5: -+** mov w[0-9]+, 65279 -+** tst w[0-9]+, w[0-9]+ -+** beq .L[0-9]+ -+** b h -+** ... -+*/ -+void g5(int x) -+{ -+ if (__builtin_expect (x & 0xfeff, 1)) -+ h (); -+} --- -2.33.0 - diff --git a/0187-Backport-SME-aarch64-Robustify-stack-tie-handling.patch b/0187-Backport-SME-aarch64-Robustify-stack-tie-handling.patch deleted file mode 100644 index cc9f87c..0000000 --- a/0187-Backport-SME-aarch64-Robustify-stack-tie-handling.patch +++ /dev/null @@ -1,126 +0,0 @@ -From 4dc3e578d958ceb73f973483f42247c3d33210dc Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 20 Jun 2023 21:48:38 +0100 -Subject: [PATCH 088/157] [Backport][SME] aarch64: Robustify stack tie handling - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=580b74a79146e51268dd11192d3870645adb0bbb - -The SVE handling of stack clash protection copied the stack -pointer to X11 before the probe and set up X11 as the CFA -for unwind purposes: - - /* This is done to provide unwinding information for the stack - adjustments we're about to do, however to prevent the optimizers - from removing the R11 move and leaving the CFA note (which would be - very wrong) we tie the old and new stack pointer together. - The tie will expand to nothing but the optimizers will not touch - the instruction. */ - rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM); - emit_move_insn (stack_ptr_copy, stack_pointer_rtx); - emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx)); - - /* We want the CFA independent of the stack pointer for the - duration of the loop. */ - add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy); - RTX_FRAME_RELATED_P (insn) = 1; - --fcprop-registers is now smart enough to realise that X11 = SP, -replace X11 with SP in the stack tie, and delete the instruction -created above. - -This patch tries to prevent that by making stack_tie fussy about -the register numbers. It fixes failures in -gcc.target/aarch64/sve/pcs/stack_clash*.c. - -gcc/ - * config/aarch64/aarch64.md (stack_tie): Hard-code the first - register operand to the stack pointer. Require the second register - operand to have the number specified in a separate const_int operand. - * config/aarch64/aarch64.cc (aarch64_emit_stack_tie): New function. - (aarch64_allocate_and_probe_stack_space): Use it. - (aarch64_expand_prologue, aarch64_expand_epilogue): Likewise. - (aarch64_expand_epilogue): Likewise. ---- - gcc/config/aarch64/aarch64.cc | 18 ++++++++++++++---- - gcc/config/aarch64/aarch64.md | 7 ++++--- - 2 files changed, 18 insertions(+), 7 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 2bb49b9b0..4d505c6fc 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -9917,6 +9917,16 @@ aarch64_stack_clash_protection_alloca_probe_range (void) - return STACK_CLASH_CALLER_GUARD; - } - -+/* Emit a stack tie that acts as a scheduling barrier for all previous and -+ subsequent memory accesses and that requires the stack pointer and REG -+ to have their current values. REG can be stack_pointer_rtx if no -+ other register's value needs to be fixed. */ -+ -+static void -+aarch64_emit_stack_tie (rtx reg) -+{ -+ emit_insn (gen_stack_tie (reg, gen_int_mode (REGNO (reg), DImode))); -+} - - /* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch - registers. If POLY_SIZE is not large enough to require a probe this function -@@ -10030,7 +10040,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - the instruction. */ - rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM); - emit_move_insn (stack_ptr_copy, stack_pointer_rtx); -- emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx)); -+ aarch64_emit_stack_tie (stack_ptr_copy); - - /* We want the CFA independent of the stack pointer for the - duration of the loop. */ -@@ -10398,7 +10408,7 @@ aarch64_expand_prologue (void) - aarch64_add_cfa_expression (insn, regno_reg_rtx[reg1], - hard_frame_pointer_rtx, 0); - } -- emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); -+ aarch64_emit_stack_tie (hard_frame_pointer_rtx); - } - - aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM, -@@ -10501,7 +10511,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) - || cfun->calls_alloca - || crtl->calls_eh_return) - { -- emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); -+ aarch64_emit_stack_tie (stack_pointer_rtx); - need_barrier_p = false; - } - -@@ -10540,7 +10550,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) - callee_adjust != 0, &cfi_ops); - - if (need_barrier_p) -- emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); -+ aarch64_emit_stack_tie (stack_pointer_rtx); - - if (callee_adjust != 0) - aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops); -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index 2becc888e..2ce123255 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -7088,10 +7088,11 @@ - - (define_insn "stack_tie" - [(set (mem:BLK (scratch)) -- (unspec:BLK [(match_operand:DI 0 "register_operand" "rk") -- (match_operand:DI 1 "register_operand" "rk")] -+ (unspec:BLK [(reg:DI SP_REGNUM) -+ (match_operand:DI 0 "register_operand" "rk") -+ (match_operand:DI 1 "const_int_operand")] - UNSPEC_PRLG_STK))] -- "" -+ "REGNO (operands[0]) == INTVAL (operands[1])" - "" - [(set_attr "length" "0")] - ) --- -2.33.0 - diff --git a/0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch b/0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch deleted file mode 100644 index 6348a49..0000000 --- a/0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch +++ /dev/null @@ -1,201 +0,0 @@ -From 31433584b018cb2dc81e2366351a57bf5e1c4e44 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 17 Oct 2023 23:45:43 +0100 -Subject: [PATCH 103/157] [Backport][SME] Handle epilogues that contain jumps - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=aeb3f0436f8ae84e593eda9641fe4e6fdf0afb3e - -The prologue/epilogue pass allows the prologue sequence to contain -jumps. The sequence is then partitioned into basic blocks using -find_many_sub_basic_blocks. - -This patch treats epilogues in a similar way. Since only one block -might need to be split, the patch (re)introduces a find_sub_basic_blocks -routine to handle a single block. - -The new routine hard-codes the assumption that split_block will chain -the new block immediately after the original block. The routine doesn't -try to replicate the fix for PR81030, since that was specific to -gimple->rtl expansion. - -The patch is needed for follow-on aarch64 patches that add conditional -code to the epilogue. The tests are part of those patches. - -gcc/ - * cfgbuild.h (find_sub_basic_blocks): Declare. - * cfgbuild.cc (update_profile_for_new_sub_basic_block): New function, - split out from... - (find_many_sub_basic_blocks): ...here. - (find_sub_basic_blocks): New function. - * function.cc (thread_prologue_and_epilogue_insns): Handle - epilogues that contain jumps. ---- - gcc/cfgbuild.cc | 95 +++++++++++++++++++++++++++++++++---------------- - gcc/cfgbuild.h | 1 + - gcc/function.cc | 4 +++ - 3 files changed, 70 insertions(+), 30 deletions(-) - -diff --git a/gcc/cfgbuild.cc b/gcc/cfgbuild.cc -index 646a06614..58b865f29 100644 ---- a/gcc/cfgbuild.cc -+++ b/gcc/cfgbuild.cc -@@ -693,6 +693,43 @@ compute_outgoing_frequencies (basic_block b) - } - } - -+/* Update the profile information for BB, which was created by splitting -+ an RTL block that had a non-final jump. */ -+ -+static void -+update_profile_for_new_sub_basic_block (basic_block bb) -+{ -+ edge e; -+ edge_iterator ei; -+ -+ bool initialized_src = false, uninitialized_src = false; -+ bb->count = profile_count::zero (); -+ FOR_EACH_EDGE (e, ei, bb->preds) -+ { -+ if (e->count ().initialized_p ()) -+ { -+ bb->count += e->count (); -+ initialized_src = true; -+ } -+ else -+ uninitialized_src = true; -+ } -+ /* When some edges are missing with read profile, this is -+ most likely because RTL expansion introduced loop. -+ When profile is guessed we may have BB that is reachable -+ from unlikely path as well as from normal path. -+ -+ TODO: We should handle loops created during BB expansion -+ correctly here. For now we assume all those loop to cycle -+ precisely once. */ -+ if (!initialized_src -+ || (uninitialized_src -+ && profile_status_for_fn (cfun) < PROFILE_GUESSED)) -+ bb->count = profile_count::uninitialized (); -+ -+ compute_outgoing_frequencies (bb); -+} -+ - /* Assume that some pass has inserted labels or control flow - instructions within a basic block. Split basic blocks as needed - and create edges. */ -@@ -744,40 +781,15 @@ find_many_sub_basic_blocks (sbitmap blocks) - if (profile_status_for_fn (cfun) != PROFILE_ABSENT) - FOR_BB_BETWEEN (bb, min, max->next_bb, next_bb) - { -- edge e; -- edge_iterator ei; -- - if (STATE (bb) == BLOCK_ORIGINAL) - continue; - if (STATE (bb) == BLOCK_NEW) - { -- bool initialized_src = false, uninitialized_src = false; -- bb->count = profile_count::zero (); -- FOR_EACH_EDGE (e, ei, bb->preds) -- { -- if (e->count ().initialized_p ()) -- { -- bb->count += e->count (); -- initialized_src = true; -- } -- else -- uninitialized_src = true; -- } -- /* When some edges are missing with read profile, this is -- most likely because RTL expansion introduced loop. -- When profile is guessed we may have BB that is reachable -- from unlikely path as well as from normal path. -- -- TODO: We should handle loops created during BB expansion -- correctly here. For now we assume all those loop to cycle -- precisely once. */ -- if (!initialized_src -- || (uninitialized_src -- && profile_status_for_fn (cfun) < PROFILE_GUESSED)) -- bb->count = profile_count::uninitialized (); -+ update_profile_for_new_sub_basic_block (bb); -+ continue; - } -- /* If nothing changed, there is no need to create new BBs. */ -- else if (EDGE_COUNT (bb->succs) == n_succs[bb->index]) -+ /* If nothing changed, there is no need to create new BBs. */ -+ if (EDGE_COUNT (bb->succs) == n_succs[bb->index]) - { - /* In rare occassions RTL expansion might have mistakely assigned - a probabilities different from what is in CFG. This happens -@@ -788,10 +800,33 @@ find_many_sub_basic_blocks (sbitmap blocks) - update_br_prob_note (bb); - continue; - } -- - compute_outgoing_frequencies (bb); - } - - FOR_EACH_BB_FN (bb, cfun) - SET_STATE (bb, 0); - } -+ -+/* Like find_many_sub_basic_blocks, but look only within BB. */ -+ -+void -+find_sub_basic_blocks (basic_block bb) -+{ -+ basic_block end_bb = bb->next_bb; -+ find_bb_boundaries (bb); -+ if (bb->next_bb == end_bb) -+ return; -+ -+ /* Re-scan and wire in all edges. This expects simple (conditional) -+ jumps at the end of each new basic blocks. */ -+ make_edges (bb, end_bb->prev_bb, 1); -+ -+ /* Update branch probabilities. Expect only (un)conditional jumps -+ to be created with only the forward edges. */ -+ if (profile_status_for_fn (cfun) != PROFILE_ABSENT) -+ { -+ compute_outgoing_frequencies (bb); -+ for (bb = bb->next_bb; bb != end_bb; bb = bb->next_bb) -+ update_profile_for_new_sub_basic_block (bb); -+ } -+} -diff --git a/gcc/cfgbuild.h b/gcc/cfgbuild.h -index 85145da7f..53543bb75 100644 ---- a/gcc/cfgbuild.h -+++ b/gcc/cfgbuild.h -@@ -24,5 +24,6 @@ extern bool inside_basic_block_p (const rtx_insn *); - extern bool control_flow_insn_p (const rtx_insn *); - extern void rtl_make_eh_edge (sbitmap, basic_block, rtx); - extern void find_many_sub_basic_blocks (sbitmap); -+extern void find_sub_basic_blocks (basic_block); - - #endif /* GCC_CFGBUILD_H */ -diff --git a/gcc/function.cc b/gcc/function.cc -index ddab43ca4..f4fc211a0 100644 ---- a/gcc/function.cc -+++ b/gcc/function.cc -@@ -6126,6 +6126,8 @@ thread_prologue_and_epilogue_insns (void) - && returnjump_p (BB_END (e->src))) - e->flags &= ~EDGE_FALLTHRU; - } -+ -+ find_sub_basic_blocks (BLOCK_FOR_INSN (epilogue_seq)); - } - else if (next_active_insn (BB_END (exit_fallthru_edge->src))) - { -@@ -6234,6 +6236,8 @@ thread_prologue_and_epilogue_insns (void) - set_insn_locations (seq, epilogue_location); - - emit_insn_before (seq, insn); -+ -+ find_sub_basic_blocks (BLOCK_FOR_INSN (insn)); - } - } - --- -2.33.0 - diff --git a/0203-Backport-SME-aarch64-Use-vecs-to-store-register-save.patch b/0203-Backport-SME-aarch64-Use-vecs-to-store-register-save.patch deleted file mode 100644 index b9e9c93..0000000 --- a/0203-Backport-SME-aarch64-Use-vecs-to-store-register-save.patch +++ /dev/null @@ -1,709 +0,0 @@ -From 554c83414c10909c39e0ad30026ffa4821dd9698 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 17 Oct 2023 23:46:33 +0100 -Subject: [PATCH 104/157] [Backport][SME] aarch64: Use vecs to store register - save order - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=575858508090b18dcbc176db285c9f55227ca4c0 - -aarch64_save/restore_callee_saves looped over registers in register -number order. This in turn meant that we could only use LDP and STP -for registers that were consecutive both number-wise and -offset-wise (after unsaved registers are excluded). - -This patch instead builds lists of the registers that we've decided to -save, in offset order. We can then form LDP/STP pairs regardless of -register number order, which in turn means that we can put the LR save -slot first without losing LDP/STP opportunities. - -gcc/ - * config/aarch64/aarch64.h (aarch64_frame): Add vectors that - store the list saved GPRs, FPRs and predicate registers. - * config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize - the lists of saved registers. Use them to choose push candidates. - Invalidate pop candidates if we're not going to do a pop. - (aarch64_next_callee_save): Delete. - (aarch64_save_callee_saves): Take a list of registers, - rather than a range. Make !skip_wb select only write-back - candidates. - (aarch64_expand_prologue): Update calls accordingly. - (aarch64_restore_callee_saves): Take a list of registers, - rather than a range. Always skip pop candidates. Also skip - LR if shadow call stacks are enabled. - (aarch64_expand_epilogue): Update calls accordingly. - -gcc/testsuite/ - * gcc.target/aarch64/sve/pcs/stack_clash_2.c: Expect restores - to happen in offset order. - * gcc.target/aarch64/sve/pcs/stack_clash_2_128.c: Likewise. - * gcc.target/aarch64/sve/pcs/stack_clash_2_256.c: Likewise. - * gcc.target/aarch64/sve/pcs/stack_clash_2_512.c: Likewise. - * gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c: Likewise. - * gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c: Likewise. ---- - gcc/config/aarch64/aarch64.cc | 203 +++++++++--------- - gcc/config/aarch64/aarch64.h | 9 +- - .../aarch64/sve/pcs/stack_clash_2.c | 6 +- - .../aarch64/sve/pcs/stack_clash_2_1024.c | 6 +- - .../aarch64/sve/pcs/stack_clash_2_128.c | 6 +- - .../aarch64/sve/pcs/stack_clash_2_2048.c | 6 +- - .../aarch64/sve/pcs/stack_clash_2_256.c | 6 +- - .../aarch64/sve/pcs/stack_clash_2_512.c | 6 +- - 8 files changed, 128 insertions(+), 120 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 8d4dd2891..e10c9d763 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8753,13 +8753,17 @@ aarch64_save_regs_above_locals_p () - static void - aarch64_layout_frame (void) - { -- int regno, last_fp_reg = INVALID_REGNUM; -+ unsigned regno, last_fp_reg = INVALID_REGNUM; - machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM); - poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode); - bool frame_related_fp_reg_p = false; - aarch64_frame &frame = cfun->machine->frame; - poly_int64 top_of_locals = -1; - -+ vec_safe_truncate (frame.saved_gprs, 0); -+ vec_safe_truncate (frame.saved_fprs, 0); -+ vec_safe_truncate (frame.saved_prs, 0); -+ - frame.emit_frame_chain = aarch64_needs_frame_chain (); - - /* Adjust the outgoing arguments size if required. Keep it in sync with what -@@ -8844,6 +8848,7 @@ aarch64_layout_frame (void) - for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++) - if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) - { -+ vec_safe_push (frame.saved_prs, regno); - if (frame.sve_save_and_probe == INVALID_REGNUM) - frame.sve_save_and_probe = regno; - frame.reg_offset[regno] = offset; -@@ -8865,7 +8870,7 @@ aarch64_layout_frame (void) - If we don't have any vector registers to save, and we know how - big the predicate save area is, we can just round it up to the - next 16-byte boundary. */ -- if (last_fp_reg == (int) INVALID_REGNUM && offset.is_constant ()) -+ if (last_fp_reg == INVALID_REGNUM && offset.is_constant ()) - offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); - else - { -@@ -8879,10 +8884,11 @@ aarch64_layout_frame (void) - } - - /* If we need to save any SVE vector registers, add them next. */ -- if (last_fp_reg != (int) INVALID_REGNUM && crtl->abi->id () == ARM_PCS_SVE) -+ if (last_fp_reg != INVALID_REGNUM && crtl->abi->id () == ARM_PCS_SVE) - for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) - if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) - { -+ vec_safe_push (frame.saved_fprs, regno); - if (frame.sve_save_and_probe == INVALID_REGNUM) - frame.sve_save_and_probe = regno; - frame.reg_offset[regno] = offset; -@@ -8903,13 +8909,8 @@ aarch64_layout_frame (void) - - auto allocate_gpr_slot = [&](unsigned int regno) - { -- if (frame.hard_fp_save_and_probe == INVALID_REGNUM) -- frame.hard_fp_save_and_probe = regno; -+ vec_safe_push (frame.saved_gprs, regno); - frame.reg_offset[regno] = offset; -- if (frame.wb_push_candidate1 == INVALID_REGNUM) -- frame.wb_push_candidate1 = regno; -- else if (frame.wb_push_candidate2 == INVALID_REGNUM) -- frame.wb_push_candidate2 = regno; - offset += UNITS_PER_WORD; - }; - -@@ -8938,8 +8939,7 @@ aarch64_layout_frame (void) - for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) - if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) - { -- if (frame.hard_fp_save_and_probe == INVALID_REGNUM) -- frame.hard_fp_save_and_probe = regno; -+ vec_safe_push (frame.saved_fprs, regno); - /* If there is an alignment gap between integer and fp callee-saves, - allocate the last fp register to it if possible. */ - if (regno == last_fp_reg -@@ -8952,21 +8952,25 @@ aarch64_layout_frame (void) - } - - frame.reg_offset[regno] = offset; -- if (frame.wb_push_candidate1 == INVALID_REGNUM) -- frame.wb_push_candidate1 = regno; -- else if (frame.wb_push_candidate2 == INVALID_REGNUM -- && frame.wb_push_candidate1 >= V0_REGNUM) -- frame.wb_push_candidate2 = regno; - offset += vector_save_size; - } - - offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); -- - auto saved_regs_size = offset - frame.bytes_below_saved_regs; -- gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size) -- || (frame.hard_fp_save_and_probe != INVALID_REGNUM -- && known_eq (frame.reg_offset[frame.hard_fp_save_and_probe], -- frame.bytes_below_hard_fp))); -+ -+ array_slice push_regs = (!vec_safe_is_empty (frame.saved_gprs) -+ ? frame.saved_gprs -+ : frame.saved_fprs); -+ if (!push_regs.empty () -+ && known_eq (frame.reg_offset[push_regs[0]], frame.bytes_below_hard_fp)) -+ { -+ frame.hard_fp_save_and_probe = push_regs[0]; -+ frame.wb_push_candidate1 = push_regs[0]; -+ if (push_regs.size () > 1) -+ frame.wb_push_candidate2 = push_regs[1]; -+ } -+ else -+ gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size)); - - /* With stack-clash, a register must be saved in non-leaf functions. - The saving of the bottommost register counts as an implicit probe, -@@ -9130,12 +9134,14 @@ aarch64_layout_frame (void) - + frame.sve_callee_adjust - + frame.final_adjust, frame.frame_size)); - -- if (!frame.emit_frame_chain && frame.callee_adjust == 0) -+ if (frame.callee_adjust == 0) - { -- /* We've decided not to associate any register saves with the initial -- stack allocation. */ -- frame.wb_pop_candidate1 = frame.wb_push_candidate1 = INVALID_REGNUM; -- frame.wb_pop_candidate2 = frame.wb_push_candidate2 = INVALID_REGNUM; -+ /* We've decided not to do a "real" push and pop. However, -+ setting up the frame chain is treated as being essentially -+ a multi-instruction push. */ -+ frame.wb_pop_candidate1 = frame.wb_pop_candidate2 = INVALID_REGNUM; -+ if (!frame.emit_frame_chain) -+ frame.wb_push_candidate1 = frame.wb_push_candidate2 = INVALID_REGNUM; - } - - frame.laid_out = true; -@@ -9150,17 +9156,6 @@ aarch64_register_saved_on_entry (int regno) - return known_ge (cfun->machine->frame.reg_offset[regno], 0); - } - --/* Return the next register up from REGNO up to LIMIT for the callee -- to save. */ -- --static unsigned --aarch64_next_callee_save (unsigned regno, unsigned limit) --{ -- while (regno <= limit && !aarch64_register_saved_on_entry (regno)) -- regno ++; -- return regno; --} -- - /* Push the register number REGNO of mode MODE to the stack with write-back - adjusting the stack by ADJUSTMENT. */ - -@@ -9424,41 +9419,46 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg, - add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); - } - --/* Emit code to save the callee-saved registers from register number START -- to LIMIT to the stack. The stack pointer is currently BYTES_BELOW_SP -- bytes above the bottom of the static frame. Skip any write-back -- candidates if SKIP_WB is true. HARD_FP_VALID_P is true if the hard -- frame pointer has been set up. */ -+/* Emit code to save the callee-saved registers in REGS. Skip any -+ write-back candidates if SKIP_WB is true, otherwise consider only -+ write-back candidates. -+ -+ The stack pointer is currently BYTES_BELOW_SP bytes above the bottom -+ of the static frame. HARD_FP_VALID_P is true if the hard frame pointer -+ has been set up. */ - - static void - aarch64_save_callee_saves (poly_int64 bytes_below_sp, -- unsigned start, unsigned limit, bool skip_wb, -+ array_slice regs, bool skip_wb, - bool hard_fp_valid_p) - { - aarch64_frame &frame = cfun->machine->frame; - rtx_insn *insn; -- unsigned regno; -- unsigned regno2; - rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX; - -- for (regno = aarch64_next_callee_save (start, limit); -- regno <= limit; -- regno = aarch64_next_callee_save (regno + 1, limit)) -+ auto skip_save_p = [&](unsigned int regno) -+ { -+ if (cfun->machine->reg_is_wrapped_separately[regno]) -+ return true; -+ -+ if (skip_wb == (regno == frame.wb_push_candidate1 -+ || regno == frame.wb_push_candidate2)) -+ return true; -+ -+ return false; -+ }; -+ -+ for (unsigned int i = 0; i < regs.size (); ++i) - { -- rtx reg, mem; -+ unsigned int regno = regs[i]; - poly_int64 offset; - bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); - -- if (skip_wb -- && (regno == frame.wb_push_candidate1 -- || regno == frame.wb_push_candidate2)) -- continue; -- -- if (cfun->machine->reg_is_wrapped_separately[regno]) -+ if (skip_save_p (regno)) - continue; - - machine_mode mode = aarch64_reg_save_mode (regno); -- reg = gen_rtx_REG (mode, regno); -+ rtx reg = gen_rtx_REG (mode, regno); - offset = frame.reg_offset[regno] - bytes_below_sp; - rtx base_rtx = stack_pointer_rtx; - poly_int64 sp_offset = offset; -@@ -9485,12 +9485,13 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, - } - offset -= fp_offset; - } -- mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); -+ rtx mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); - bool need_cfa_note_p = (base_rtx != stack_pointer_rtx); - -+ unsigned int regno2; - if (!aarch64_sve_mode_p (mode) -- && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit -- && !cfun->machine->reg_is_wrapped_separately[regno2] -+ && i + 1 < regs.size () -+ && (regno2 = regs[i + 1], !skip_save_p (regno2)) - && known_eq (GET_MODE_SIZE (mode), - frame.reg_offset[regno2] - frame.reg_offset[regno])) - { -@@ -9516,6 +9517,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, - } - - regno = regno2; -+ ++i; - } - else if (mode == VNx2DImode && BYTES_BIG_ENDIAN) - { -@@ -9533,49 +9535,57 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, - } - } - --/* Emit code to restore the callee registers from register number START -- up to and including LIMIT. The stack pointer is currently BYTES_BELOW_SP -- bytes above the bottom of the static frame. Skip any write-back -- candidates if SKIP_WB is true. Write the appropriate REG_CFA_RESTORE -- notes into CFI_OPS. */ -+/* Emit code to restore the callee registers in REGS, ignoring pop candidates -+ and any other registers that are handled separately. Write the appropriate -+ REG_CFA_RESTORE notes into CFI_OPS. -+ -+ The stack pointer is currently BYTES_BELOW_SP bytes above the bottom -+ of the static frame. */ - - static void --aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start, -- unsigned limit, bool skip_wb, rtx *cfi_ops) -+aarch64_restore_callee_saves (poly_int64 bytes_below_sp, -+ array_slice regs, rtx *cfi_ops) - { - aarch64_frame &frame = cfun->machine->frame; -- unsigned regno; -- unsigned regno2; - poly_int64 offset; - rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX; - -- for (regno = aarch64_next_callee_save (start, limit); -- regno <= limit; -- regno = aarch64_next_callee_save (regno + 1, limit)) -+ auto skip_restore_p = [&](unsigned int regno) - { -- bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); - if (cfun->machine->reg_is_wrapped_separately[regno]) -- continue; -+ return true; -+ -+ if (regno == frame.wb_pop_candidate1 -+ || regno == frame.wb_pop_candidate2) -+ return true; - -- rtx reg, mem; -+ /* The shadow call stack code restores LR separately. */ -+ if (frame.is_scs_enabled && regno == LR_REGNUM) -+ return true; - -- if (skip_wb -- && (regno == frame.wb_pop_candidate1 -- || regno == frame.wb_pop_candidate2)) -+ return false; -+ }; -+ -+ for (unsigned int i = 0; i < regs.size (); ++i) -+ { -+ unsigned int regno = regs[i]; -+ bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); -+ if (skip_restore_p (regno)) - continue; - - machine_mode mode = aarch64_reg_save_mode (regno); -- reg = gen_rtx_REG (mode, regno); -+ rtx reg = gen_rtx_REG (mode, regno); - offset = frame.reg_offset[regno] - bytes_below_sp; - rtx base_rtx = stack_pointer_rtx; - if (mode == VNx2DImode && BYTES_BIG_ENDIAN) - aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, - offset, ptrue); -- mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); -+ rtx mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); - -+ unsigned int regno2; - if (!aarch64_sve_mode_p (mode) -- && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit -- && !cfun->machine->reg_is_wrapped_separately[regno2] -+ && i + 1 < regs.size () -+ && (regno2 = regs[i + 1], !skip_restore_p (regno2)) - && known_eq (GET_MODE_SIZE (mode), - frame.reg_offset[regno2] - frame.reg_offset[regno])) - { -@@ -9588,6 +9598,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start, - - *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops); - regno = regno2; -+ ++i; - } - else if (mode == VNx2DImode && BYTES_BIG_ENDIAN) - emit_insn (gen_aarch64_pred_mov (mode, reg, ptrue, mem)); -@@ -10409,13 +10420,10 @@ aarch64_expand_prologue (void) - - frame.bytes_above_hard_fp); - gcc_assert (known_ge (chain_offset, 0)); - -+ gcc_assert (reg1 == R29_REGNUM && reg2 == R30_REGNUM); - if (callee_adjust == 0) -- { -- reg1 = R29_REGNUM; -- reg2 = R30_REGNUM; -- aarch64_save_callee_saves (bytes_below_sp, reg1, reg2, -- false, false); -- } -+ aarch64_save_callee_saves (bytes_below_sp, frame.saved_gprs, -+ false, false); - else - gcc_assert (known_eq (chain_offset, 0)); - aarch64_add_offset (Pmode, hard_frame_pointer_rtx, -@@ -10453,8 +10461,7 @@ aarch64_expand_prologue (void) - aarch64_emit_stack_tie (hard_frame_pointer_rtx); - } - -- aarch64_save_callee_saves (bytes_below_sp, R0_REGNUM, R30_REGNUM, -- callee_adjust != 0 || emit_frame_chain, -+ aarch64_save_callee_saves (bytes_below_sp, frame.saved_gprs, true, - emit_frame_chain); - if (maybe_ne (sve_callee_adjust, 0)) - { -@@ -10465,10 +10472,9 @@ aarch64_expand_prologue (void) - !frame_pointer_needed, false); - bytes_below_sp -= sve_callee_adjust; - } -- aarch64_save_callee_saves (bytes_below_sp, P0_REGNUM, P15_REGNUM, -- false, emit_frame_chain); -- aarch64_save_callee_saves (bytes_below_sp, V0_REGNUM, V31_REGNUM, -- callee_adjust != 0 || emit_frame_chain, -+ aarch64_save_callee_saves (bytes_below_sp, frame.saved_prs, true, -+ emit_frame_chain); -+ aarch64_save_callee_saves (bytes_below_sp, frame.saved_fprs, true, - emit_frame_chain); - - /* We may need to probe the final adjustment if it is larger than the guard -@@ -10514,8 +10520,6 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) - poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp; - unsigned reg1 = frame.wb_pop_candidate1; - unsigned reg2 = frame.wb_pop_candidate2; -- unsigned int last_gpr = (frame.is_scs_enabled -- ? R29_REGNUM : R30_REGNUM); - rtx cfi_ops = NULL; - rtx_insn *insn; - /* A stack clash protection prologue may not have left EP0_REGNUM or -@@ -10579,10 +10583,8 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) - - /* Restore the vector registers before the predicate registers, - so that we can use P4 as a temporary for big-endian SVE frames. */ -- aarch64_restore_callee_saves (final_adjust, V0_REGNUM, V31_REGNUM, -- callee_adjust != 0, &cfi_ops); -- aarch64_restore_callee_saves (final_adjust, P0_REGNUM, P15_REGNUM, -- false, &cfi_ops); -+ aarch64_restore_callee_saves (final_adjust, frame.saved_fprs, &cfi_ops); -+ aarch64_restore_callee_saves (final_adjust, frame.saved_prs, &cfi_ops); - if (maybe_ne (sve_callee_adjust, 0)) - aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true); - -@@ -10590,8 +10592,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) - restore x30, we don't need to restore x30 again in the traditional - way. */ - aarch64_restore_callee_saves (final_adjust + sve_callee_adjust, -- R0_REGNUM, last_gpr, -- callee_adjust != 0, &cfi_ops); -+ frame.saved_gprs, &cfi_ops); - - if (need_barrier_p) - aarch64_emit_stack_tie (stack_pointer_rtx); -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 292ef2eec..1591cde8b 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -787,7 +787,7 @@ extern enum aarch64_processor aarch64_tune; - - #define DEFAULT_PCC_STRUCT_RETURN 0 - --#ifdef HAVE_POLY_INT_H -+#if defined(HAVE_POLY_INT_H) && defined(GCC_VEC_H) - struct GTY (()) aarch64_frame - { - /* The offset from the bottom of the static frame (the bottom of the -@@ -795,6 +795,13 @@ struct GTY (()) aarch64_frame - needed. */ - poly_int64 reg_offset[LAST_SAVED_REGNUM + 1]; - -+ /* The list of GPRs, FPRs and predicate registers that have nonnegative -+ entries in reg_offset. The registers are listed in order of -+ increasing offset (rather than increasing register number). */ -+ vec *saved_gprs; -+ vec *saved_fprs; -+ vec *saved_prs; -+ - /* The number of extra stack bytes taken up by register varargs. - This area is allocated by the callee at the very top of the - frame. This value is rounded up to a multiple of -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c -index 4622a1eed..bbb45d266 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c -@@ -215,9 +215,9 @@ test_7 (void) - ** add sp, sp, #?16 - ** ldr p4, \[sp\] - ** addvl sp, sp, #1 -+** ldp x29, x30, \[sp\] - ** ldp x24, x25, \[sp, 16\] - ** ldr x26, \[sp, 32\] --** ldp x29, x30, \[sp\] - ** mov x12, #?4144 - ** add sp, sp, x12 - ** ret -@@ -283,9 +283,9 @@ test_9 (int n) - ** addvl sp, x29, #-1 - ** ldr p4, \[sp\] - ** addvl sp, sp, #1 -+** ldp x29, x30, \[sp\] - ** ldp x24, x25, \[sp, 16\] - ** ldr x26, \[sp, 32\] --** ldp x29, x30, \[sp\] - ** mov x12, #?4144 - ** add sp, sp, x12 - ** ret -@@ -319,9 +319,9 @@ test_10 (int n) - ** addvl sp, x29, #-1 - ** ldr p4, \[sp\] - ** addvl sp, sp, #1 -+** ldp x29, x30, \[sp\] - ** ldp x24, x25, \[sp, 16\] - ** ldr x26, \[sp, 32\] --** ldp x29, x30, \[sp\] - ** add sp, sp, #?3008 - ** add sp, sp, #?126976 - ** ret -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c -index e31200fc2..9437c7a85 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c -@@ -176,9 +176,9 @@ test_7 (void) - ** add sp, sp, #?16 - ** ldr z16, \[sp\] - ** add sp, sp, #?128 -+** ldp x29, x30, \[sp\] - ** ldp x24, x25, \[sp, 16\] - ** ldr x26, \[sp, 32\] --** ldp x29, x30, \[sp\] - ** mov x12, #?4144 - ** add sp, sp, x12 - ** ret -@@ -234,9 +234,9 @@ test_9 (int n) - ** sub sp, x29, #128 - ** ldr z16, \[sp\] - ** add sp, sp, #?128 -+** ldp x29, x30, \[sp\] - ** ldp x24, x25, \[sp, 16\] - ** ldr x26, \[sp, 32\] --** ldp x29, x30, \[sp\] - ** mov x12, #?4144 - ** add sp, sp, x12 - ** ret -@@ -268,9 +268,9 @@ test_10 (int n) - ** sub sp, x29, #128 - ** ldr z16, \[sp\] - ** add sp, sp, #?128 -+** ldp x29, x30, \[sp\] - ** ldp x24, x25, \[sp, 16\] - ** ldr x26, \[sp, 32\] --** ldp x29, x30, \[sp\] - ** add sp, sp, #?3008 - ** add sp, sp, #?126976 - ** ret -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c -index 41193b411..b4e1627fa 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c -@@ -176,9 +176,9 @@ test_7 (void) - ** add sp, sp, #?16 - ** ldr p4, \[sp\] - ** add sp, sp, #?16 -+** ldp x29, x30, \[sp\] - ** ldp x24, x25, \[sp, 16\] - ** ldr x26, \[sp, 32\] --** ldp x29, x30, \[sp\] - ** mov x12, #?4144 - ** add sp, sp, x12 - ** ret -@@ -234,9 +234,9 @@ test_9 (int n) - ** sub sp, x29, #16 - ** ldr p4, \[sp\] - ** add sp, sp, #?16 -+** ldp x29, x30, \[sp\] - ** ldp x24, x25, \[sp, 16\] - ** ldr x26, \[sp, 32\] --** ldp x29, x30, \[sp\] - ** mov x12, #?4144 - ** add sp, sp, x12 - ** ret -@@ -267,9 +267,9 @@ test_10 (int n) - ** sub sp, x29, #16 - ** ldr p4, \[sp\] - ** add sp, sp, #?16 -+** ldp x29, x30, \[sp\] - ** ldp x24, x25, \[sp, 16\] - ** ldr x26, \[sp, 32\] --** ldp x29, x30, \[sp\] - ** add sp, sp, #?3008 - ** add sp, sp, #?126976 - ** ret -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c -index f63751678..921209379 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c -@@ -176,9 +176,9 @@ test_7 (void) - ** add sp, sp, #?16 - ** ldr z16, \[sp\] - ** add sp, sp, #?256 -+** ldp x29, x30, \[sp\] - ** ldp x24, x25, \[sp, 16\] - ** ldr x26, \[sp, 32\] --** ldp x29, x30, \[sp\] - ** mov x12, #?4144 - ** add sp, sp, x12 - ** ret -@@ -234,9 +234,9 @@ test_9 (int n) - ** sub sp, x29, #256 - ** ldr z16, \[sp\] - ** add sp, sp, #?256 -+** ldp x29, x30, \[sp\] - ** ldp x24, x25, \[sp, 16\] - ** ldr x26, \[sp, 32\] --** ldp x29, x30, \[sp\] - ** mov x12, #?4144 - ** add sp, sp, x12 - ** ret -@@ -268,9 +268,9 @@ test_10 (int n) - ** sub sp, x29, #256 - ** ldr z16, \[sp\] - ** add sp, sp, #?256 -+** ldp x29, x30, \[sp\] - ** ldp x24, x25, \[sp, 16\] - ** ldr x26, \[sp, 32\] --** ldp x29, x30, \[sp\] - ** add sp, sp, #?3008 - ** add sp, sp, #?126976 - ** ret -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c -index 6bcbb5772..bd8bef0f0 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c -@@ -176,9 +176,9 @@ test_7 (void) - ** add sp, sp, #?16 - ** ldr z16, \[sp\] - ** add sp, sp, #?32 -+** ldp x29, x30, \[sp\] - ** ldp x24, x25, \[sp, 16\] - ** ldr x26, \[sp, 32\] --** ldp x29, x30, \[sp\] - ** mov x12, #?4144 - ** add sp, sp, x12 - ** ret -@@ -234,9 +234,9 @@ test_9 (int n) - ** sub sp, x29, #32 - ** ldr z16, \[sp\] - ** add sp, sp, #?32 -+** ldp x29, x30, \[sp\] - ** ldp x24, x25, \[sp, 16\] - ** ldr x26, \[sp, 32\] --** ldp x29, x30, \[sp\] - ** mov x12, #?4144 - ** add sp, sp, x12 - ** ret -@@ -267,9 +267,9 @@ test_10 (int n) - ** sub sp, x29, #32 - ** ldr z16, \[sp\] - ** add sp, sp, #?32 -+** ldp x29, x30, \[sp\] - ** ldp x24, x25, \[sp, 16\] - ** ldr x26, \[sp, 32\] --** ldp x29, x30, \[sp\] - ** add sp, sp, #?3008 - ** add sp, sp, #?126976 - ** ret -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c -index dc7df8e6b..2c76ccecd 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c -@@ -176,9 +176,9 @@ test_7 (void) - ** add sp, sp, #?16 - ** ldr z16, \[sp\] - ** add sp, sp, #?64 -+** ldp x29, x30, \[sp\] - ** ldp x24, x25, \[sp, 16\] - ** ldr x26, \[sp, 32\] --** ldp x29, x30, \[sp\] - ** mov x12, #?4144 - ** add sp, sp, x12 - ** ret -@@ -234,9 +234,9 @@ test_9 (int n) - ** sub sp, x29, #64 - ** ldr z16, \[sp\] - ** add sp, sp, #?64 -+** ldp x29, x30, \[sp\] - ** ldp x24, x25, \[sp, 16\] - ** ldr x26, \[sp, 32\] --** ldp x29, x30, \[sp\] - ** mov x12, #?4144 - ** add sp, sp, x12 - ** ret -@@ -268,9 +268,9 @@ test_10 (int n) - ** sub sp, x29, #64 - ** ldr z16, \[sp\] - ** add sp, sp, #?64 -+** ldp x29, x30, \[sp\] - ** ldp x24, x25, \[sp, 16\] - ** ldr x26, \[sp, 32\] --** ldp x29, x30, \[sp\] - ** add sp, sp, #?3008 - ** add sp, sp, #?126976 - ** ret --- -2.33.0 - diff --git a/0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch b/0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch deleted file mode 100644 index 25a3b36..0000000 --- a/0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch +++ /dev/null @@ -1,107 +0,0 @@ -From ccc3ca614bbaa242fe25ec82b903dfcac03fe2de Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 17 Oct 2023 23:46:33 +0100 -Subject: [PATCH 105/157] [Backport][SME] aarch64: Put LR save slot first in - more cases - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=773306e9ef4ea1407f89686eb513a50602493666 - -Now that the prologue and epilogue code iterates over saved -registers in offset order, we can put the LR save slot first -without compromising LDP/STP formation. - -This isn't worthwhile when shadow call stacks are enabled, since the -first two registers are also push/pop candidates, and LR cannot be -popped when shadow call stacks are enabled. (LR is instead loaded -first and compared against the shadow stack's value.) - -But otherwise, it seems better to put the LR save slot first, -to reduce unnecessary variation with the layout for stack clash -protection. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_layout_frame): Don't make - the position of the LR save slot dependent on stack clash - protection unless shadow call stacks are enabled. - -gcc/testsuite/ - * gcc.target/aarch64/test_frame_2.c: Expect x30 to come before x19. - * gcc.target/aarch64/test_frame_4.c: Likewise. - * gcc.target/aarch64/test_frame_7.c: Likewise. - * gcc.target/aarch64/test_frame_10.c: Likewise. ---- - gcc/config/aarch64/aarch64.cc | 2 +- - gcc/testsuite/gcc.target/aarch64/test_frame_10.c | 4 ++-- - gcc/testsuite/gcc.target/aarch64/test_frame_2.c | 4 ++-- - gcc/testsuite/gcc.target/aarch64/test_frame_4.c | 4 ++-- - gcc/testsuite/gcc.target/aarch64/test_frame_7.c | 4 ++-- - 5 files changed, 9 insertions(+), 9 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index e10c9d763..1c127192d 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8920,7 +8920,7 @@ aarch64_layout_frame (void) - allocate_gpr_slot (R29_REGNUM); - allocate_gpr_slot (R30_REGNUM); - } -- else if (flag_stack_clash_protection -+ else if ((flag_stack_clash_protection || !frame.is_scs_enabled) - && known_eq (frame.reg_offset[R30_REGNUM], SLOT_REQUIRED)) - /* Put the LR save slot first, since it makes a good choice of probe - for stack clash purposes. The idea is that the link register usually -diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c -index c19505082..c54ab2d0c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c -+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c -@@ -14,6 +14,6 @@ - t_frame_pattern_outgoing (test10, 480, "x19", 24, a[8], a[9], a[10]) - t_frame_run (test10) - --/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, \[0-9\]+\\\]" 1 } } */ --/* { dg-final { scan-assembler "ldp\tx19, x30, \\\[sp, \[0-9\]+\\\]" } } */ -+/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\[sp, \[0-9\]+\\\]" 1 } } */ -+/* { dg-final { scan-assembler "ldp\tx30, x19, \\\[sp, \[0-9\]+\\\]" } } */ - -diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c -index 7e5df84cf..0d715314c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c -@@ -14,6 +14,6 @@ t_frame_pattern (test2, 200, "x19") - t_frame_run (test2) - - --/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */ --/* { dg-final { scan-assembler "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" } } */ -+/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\[sp, -\[0-9\]+\\\]!" 1 } } */ -+/* { dg-final { scan-assembler "ldp\tx30, x19, \\\[sp\\\], \[0-9\]+" } } */ - -diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c -index ed13487a0..b41229c42 100644 ---- a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c -+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c -@@ -13,6 +13,6 @@ - t_frame_pattern (test4, 400, "x19") - t_frame_run (test4) - --/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */ --/* { dg-final { scan-assembler "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" } } */ -+/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\[sp, -\[0-9\]+\\\]!" 1 } } */ -+/* { dg-final { scan-assembler "ldp\tx30, x19, \\\[sp\\\], \[0-9\]+" } } */ - -diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c -index 964527949..5702656a5 100644 ---- a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c -+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c -@@ -13,6 +13,6 @@ - t_frame_pattern (test7, 700, "x19") - t_frame_run (test7) - --/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp]" 1 } } */ --/* { dg-final { scan-assembler "ldp\tx19, x30, \\\[sp\\\]" } } */ -+/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\[sp]" 1 } } */ -+/* { dg-final { scan-assembler "ldp\tx30, x19, \\\[sp\\\]" } } */ - --- -2.33.0 - diff --git a/0205-Backport-SME-aarch64-Switch-PSTATE.SM-around-calls.patch b/0205-Backport-SME-aarch64-Switch-PSTATE.SM-around-calls.patch deleted file mode 100644 index d0a23d2..0000000 --- a/0205-Backport-SME-aarch64-Switch-PSTATE.SM-around-calls.patch +++ /dev/null @@ -1,3270 +0,0 @@ -From 88a41bc24eb793eee27aa9f4ef6b763b3c3e76e6 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:25 +0000 -Subject: [PATCH 106/157] [Backport][SME] aarch64: Switch PSTATE.SM around - calls - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dd8090f40079fa41ee58d9f76b2e50ed4f95c6bf - -This patch adds support for switching to the appropriate SME mode -for each call. Switching to streaming mode requires an SMSTART SM -instruction and switching to non-streaming mode requires an SMSTOP SM -instruction. If the call is being made from streaming-compatible code, -these switches are conditional on the current mode being the opposite -of the one that the call needs. - -Since changing PSTATE.SM changes the vector length and effectively -changes the ISA, the code to do the switching has to be emitted late. -The patch does this using a new pass that runs next to late prologue/ -epilogue insertion. (It doesn't use md_reorg because later additions -need the CFG.) - -If a streaming-compatible function needs to switch mode for a call, -it must restore the original mode afterwards. The old mode must -therefore be available immediately after the call. The easiest -way of ensuring this is to force the use of a hard frame pointer -and ensure that the old state is saved at an in-range offset -from there. - -Changing modes clobbers the Z and P registers, so we need to -save and restore live Z and P state around each mode switch. -However, mode switches are not expected to be performance -critical, so it seemed better to err on the side of being -correct rather than trying to optimise the save and restore -with surrounding code. - -gcc/ - * config/aarch64/aarch64-passes.def - (pass_late_thread_prologue_and_epilogue): New pass. - * config/aarch64/aarch64-sme.md: New file. - * config/aarch64/aarch64.md: Include it. - (*tb1): Rename to... - (@aarch64_tb): ...this. - (call, call_value, sibcall, sibcall_value): Don't require operand 2 - to be a CONST_INT. - * config/aarch64/aarch64-protos.h (aarch64_emit_call_insn): Return - the insn. - (make_pass_switch_sm_state): Declare. - * config/aarch64/aarch64.h (TARGET_STREAMING_COMPATIBLE): New macro. - (CALL_USED_REGISTER): Mark VG as call-preserved. - (aarch64_frame::old_svcr_offset): New member variable. - (machine_function::call_switches_sm_state): Likewise. - (CUMULATIVE_ARGS::num_sme_mode_switch_args): Likewise. - (CUMULATIVE_ARGS::sme_mode_switch_args): Likewise. - * config/aarch64/aarch64.cc: Include tree-pass.h and cfgbuild.h. - (aarch64_cfun_incoming_pstate_sm): New function. - (aarch64_call_switches_pstate_sm): Likewise. - (aarch64_reg_save_mode): Return DImode for VG_REGNUM. - (aarch64_callee_isa_mode): New function. - (aarch64_insn_callee_isa_mode): Likewise. - (aarch64_guard_switch_pstate_sm): Likewise. - (aarch64_switch_pstate_sm): Likewise. - (aarch64_sme_mode_switch_regs): New class. - (aarch64_record_sme_mode_switch_args): New function. - (aarch64_finish_sme_mode_switch_args): Likewise. - (aarch64_function_arg): Handle the end marker by returning a - PARALLEL that contains the ABI cookie that we used previously - alongside the result of aarch64_finish_sme_mode_switch_args. - (aarch64_init_cumulative_args): Initialize num_sme_mode_switch_args. - (aarch64_function_arg_advance): If a call would switch SM state, - record all argument registers that would need to be saved around - the mode switch. - (aarch64_need_old_pstate_sm): New function. - (aarch64_layout_frame): Decide whether the frame needs to store the - incoming value of PSTATE.SM and allocate a save slot for it if so. - If a function switches SME state, arrange to save the old value - of the DWARF VG register. Handle the case where this is the only - register save slot above the FP. - (aarch64_save_callee_saves): Handles saves of the DWARF VG register. - (aarch64_get_separate_components): Prevent such saves from being - shrink-wrapped. - (aarch64_old_svcr_mem): New function. - (aarch64_read_old_svcr): Likewise. - (aarch64_guard_switch_pstate_sm): Likewise. - (aarch64_expand_prologue): Handle saves of the DWARF VG register. - Initialize any SVCR save slot. - (aarch64_expand_call): Allow the cookie to be PARALLEL that contains - both the UNSPEC_CALLEE_ABI value and a list of registers that need - to be preserved across a change to PSTATE.SM. If the call does - involve such a change to PSTATE.SM, record the registers that - would be clobbered by this process. Also emit an instruction - to mark the temporary change in VG. Update call_switches_pstate_sm. - (aarch64_emit_call_insn): Return the emitted instruction. - (aarch64_frame_pointer_required): New function. - (aarch64_conditional_register_usage): Prevent VG_REGNUM from being - treated as a register operand. - (aarch64_switch_pstate_sm_for_call): New function. - (pass_data_switch_pstate_sm): New pass variable. - (pass_switch_pstate_sm): New pass class. - (make_pass_switch_pstate_sm): New function. - (TARGET_FRAME_POINTER_REQUIRED): Define. - * config/aarch64/t-aarch64 (s-check-sve-md): Add aarch64-sme.md. - -gcc/testsuite/ - * gcc.target/aarch64/sme/call_sm_switch_1.c: New test. - * gcc.target/aarch64/sme/call_sm_switch_2.c: Likewise. - * gcc.target/aarch64/sme/call_sm_switch_3.c: Likewise. - * gcc.target/aarch64/sme/call_sm_switch_4.c: Likewise. - * gcc.target/aarch64/sme/call_sm_switch_5.c: Likewise. - * gcc.target/aarch64/sme/call_sm_switch_6.c: Likewise. - * gcc.target/aarch64/sme/call_sm_switch_7.c: Likewise. - * gcc.target/aarch64/sme/call_sm_switch_8.c: Likewise. - * gcc.target/aarch64/sme/call_sm_switch_9.c: Likewise. - * gcc.target/aarch64/sme/call_sm_switch_10.c: Likewise. ---- - gcc/config/aarch64/aarch64-passes.def | 1 + - gcc/config/aarch64/aarch64-protos.h | 3 +- - gcc/config/aarch64/aarch64-sme.md | 171 ++++ - gcc/config/aarch64/aarch64.cc | 883 +++++++++++++++++- - gcc/config/aarch64/aarch64.h | 25 +- - gcc/config/aarch64/aarch64.md | 13 +- - gcc/config/aarch64/t-aarch64 | 5 +- - .../gcc.target/aarch64/sme/call_sm_switch_1.c | 233 +++++ - .../aarch64/sme/call_sm_switch_10.c | 37 + - .../gcc.target/aarch64/sme/call_sm_switch_2.c | 43 + - .../gcc.target/aarch64/sme/call_sm_switch_3.c | 166 ++++ - .../gcc.target/aarch64/sme/call_sm_switch_4.c | 43 + - .../gcc.target/aarch64/sme/call_sm_switch_5.c | 318 +++++++ - .../gcc.target/aarch64/sme/call_sm_switch_6.c | 45 + - .../gcc.target/aarch64/sme/call_sm_switch_7.c | 516 ++++++++++ - .../gcc.target/aarch64/sme/call_sm_switch_8.c | 87 ++ - .../gcc.target/aarch64/sme/call_sm_switch_9.c | 103 ++ - 17 files changed, 2668 insertions(+), 24 deletions(-) - create mode 100644 gcc/config/aarch64/aarch64-sme.md - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_10.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_2.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_4.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_6.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_7.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_9.c - -diff --git a/gcc/config/aarch64/aarch64-passes.def b/gcc/config/aarch64/aarch64-passes.def -index a2babc112..c6cbbf2ef 100644 ---- a/gcc/config/aarch64/aarch64-passes.def -+++ b/gcc/config/aarch64/aarch64-passes.def -@@ -20,6 +20,7 @@ - - INSERT_PASS_AFTER (pass_regrename, 1, pass_fma_steering); - INSERT_PASS_BEFORE (pass_reorder_blocks, 1, pass_track_speculation); -+INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue, 1, pass_switch_pstate_sm); - INSERT_PASS_AFTER (pass_machine_reorg, 1, pass_tag_collision_avoidance); - INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_bti); - INSERT_PASS_AFTER (pass_if_after_combine, 1, pass_cc_fusion); -diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h -index 9b03410dc..737f47026 100644 ---- a/gcc/config/aarch64/aarch64-protos.h -+++ b/gcc/config/aarch64/aarch64-protos.h -@@ -913,7 +913,7 @@ void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, - const_tree, unsigned, bool = false); - void aarch64_init_expanders (void); - void aarch64_init_simd_builtins (void); --void aarch64_emit_call_insn (rtx); -+rtx_call_insn *aarch64_emit_call_insn (rtx); - void aarch64_register_pragmas (void); - void aarch64_relayout_simd_types (void); - void aarch64_reset_previous_fndecl (void); -@@ -1055,6 +1055,7 @@ rtl_opt_pass *make_pass_track_speculation (gcc::context *); - rtl_opt_pass *make_pass_tag_collision_avoidance (gcc::context *); - rtl_opt_pass *make_pass_insert_bti (gcc::context *ctxt); - rtl_opt_pass *make_pass_cc_fusion (gcc::context *ctxt); -+rtl_opt_pass *make_pass_switch_pstate_sm (gcc::context *ctxt); - - poly_uint64 aarch64_regmode_natural_size (machine_mode); - -diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md -new file mode 100644 -index 000000000..52427b4f1 ---- /dev/null -+++ b/gcc/config/aarch64/aarch64-sme.md -@@ -0,0 +1,171 @@ -+;; Machine description for AArch64 SME. -+;; Copyright (C) 2023 Free Software Foundation, Inc. -+;; -+;; This file is part of GCC. -+;; -+;; GCC is free software; you can redistribute it and/or modify it -+;; under the terms of the GNU General Public License as published by -+;; the Free Software Foundation; either version 3, or (at your option) -+;; any later version. -+;; -+;; GCC is distributed in the hope that it will be useful, but -+;; WITHOUT ANY WARRANTY; without even the implied warranty of -+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+;; General Public License for more details. -+;; -+;; You should have received a copy of the GNU General Public License -+;; along with GCC; see the file COPYING3. If not see -+;; . -+ -+;; The file is organised into the following sections (search for the full -+;; line): -+;; -+;; == State management -+;; ---- Test current state -+;; ---- PSTATE.SM management -+ -+;; ========================================================================= -+;; == State management -+;; ========================================================================= -+;; -+;; Many of the instructions in this section are only valid when SME is -+;; present. However, they don't have a TARGET_SME condition since -+;; (a) they are only emitted under direct control of aarch64 code and -+;; (b) they are sometimes used conditionally, particularly in streaming- -+;; compatible code. -+;; -+;; ========================================================================= -+ -+;; ------------------------------------------------------------------------- -+;; ---- Test current state -+;; ------------------------------------------------------------------------- -+ -+(define_c_enum "unspec" [ -+ UNSPEC_OLD_VG_SAVED -+ UNSPEC_UPDATE_VG -+ UNSPEC_GET_SME_STATE -+ UNSPEC_READ_SVCR -+]) -+ -+;; A marker instruction to say that the old value of the DWARF VG register -+;; has been saved to the stack, for CFI purposes. Operand 0 is the old -+;; value of the register and operand 1 is the save slot. -+(define_insn "aarch64_old_vg_saved" -+ [(set (reg:DI VG_REGNUM) -+ (unspec:DI [(match_operand 0) -+ (match_operand 1)] UNSPEC_OLD_VG_SAVED))] -+ "" -+ "" -+ [(set_attr "type" "no_insn")] -+) -+ -+;; A marker to indicate places where a call temporarily changes VG. -+(define_insn "aarch64_update_vg" -+ [(set (reg:DI VG_REGNUM) -+ (unspec:DI [(reg:DI VG_REGNUM)] UNSPEC_UPDATE_VG))] -+ "" -+ "" -+ [(set_attr "type" "no_insn")] -+) -+ -+(define_insn "aarch64_get_sme_state" -+ [(set (reg:TI R0_REGNUM) -+ (unspec_volatile:TI [(const_int 0)] UNSPEC_GET_SME_STATE)) -+ (clobber (reg:DI R16_REGNUM)) -+ (clobber (reg:DI R17_REGNUM)) -+ (clobber (reg:DI R18_REGNUM)) -+ (clobber (reg:DI R30_REGNUM)) -+ (clobber (reg:CC CC_REGNUM))] -+ "" -+ "bl\t__arm_sme_state" -+) -+ -+(define_insn "aarch64_read_svcr" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (unspec_volatile:DI [(const_int 0)] UNSPEC_READ_SVCR))] -+ "" -+ "mrs\t%0, svcr" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- PSTATE.SM management -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - SMSTART SM -+;; - SMSTOP SM -+;; ------------------------------------------------------------------------- -+ -+(define_c_enum "unspec" [ -+ UNSPEC_SMSTART_SM -+ UNSPEC_SMSTOP_SM -+]) -+ -+;; Turn on streaming mode. This clobbers all SVE state. -+;; -+;; Depend on VG_REGNUM to ensure that the VG save slot has already been -+;; initialized. -+(define_insn "aarch64_smstart_sm" -+ [(unspec_volatile [(const_int 0)] UNSPEC_SMSTART_SM) -+ (use (reg:DI VG_REGNUM)) -+ (clobber (reg:V4x16QI V0_REGNUM)) -+ (clobber (reg:V4x16QI V4_REGNUM)) -+ (clobber (reg:V4x16QI V8_REGNUM)) -+ (clobber (reg:V4x16QI V12_REGNUM)) -+ (clobber (reg:V4x16QI V16_REGNUM)) -+ (clobber (reg:V4x16QI V20_REGNUM)) -+ (clobber (reg:V4x16QI V24_REGNUM)) -+ (clobber (reg:V4x16QI V28_REGNUM)) -+ (clobber (reg:VNx16BI P0_REGNUM)) -+ (clobber (reg:VNx16BI P1_REGNUM)) -+ (clobber (reg:VNx16BI P2_REGNUM)) -+ (clobber (reg:VNx16BI P3_REGNUM)) -+ (clobber (reg:VNx16BI P4_REGNUM)) -+ (clobber (reg:VNx16BI P5_REGNUM)) -+ (clobber (reg:VNx16BI P6_REGNUM)) -+ (clobber (reg:VNx16BI P7_REGNUM)) -+ (clobber (reg:VNx16BI P8_REGNUM)) -+ (clobber (reg:VNx16BI P9_REGNUM)) -+ (clobber (reg:VNx16BI P10_REGNUM)) -+ (clobber (reg:VNx16BI P11_REGNUM)) -+ (clobber (reg:VNx16BI P12_REGNUM)) -+ (clobber (reg:VNx16BI P13_REGNUM)) -+ (clobber (reg:VNx16BI P14_REGNUM)) -+ (clobber (reg:VNx16BI P15_REGNUM))] -+ "" -+ "smstart\tsm" -+) -+ -+;; Turn off streaming mode. This clobbers all SVE state. -+;; -+;; Depend on VG_REGNUM to ensure that the VG save slot has already been -+;; initialized. -+(define_insn "aarch64_smstop_sm" -+ [(unspec_volatile [(const_int 0)] UNSPEC_SMSTOP_SM) -+ (use (reg:DI VG_REGNUM)) -+ (clobber (reg:V4x16QI V0_REGNUM)) -+ (clobber (reg:V4x16QI V4_REGNUM)) -+ (clobber (reg:V4x16QI V8_REGNUM)) -+ (clobber (reg:V4x16QI V12_REGNUM)) -+ (clobber (reg:V4x16QI V16_REGNUM)) -+ (clobber (reg:V4x16QI V20_REGNUM)) -+ (clobber (reg:V4x16QI V24_REGNUM)) -+ (clobber (reg:V4x16QI V28_REGNUM)) -+ (clobber (reg:VNx16BI P0_REGNUM)) -+ (clobber (reg:VNx16BI P1_REGNUM)) -+ (clobber (reg:VNx16BI P2_REGNUM)) -+ (clobber (reg:VNx16BI P3_REGNUM)) -+ (clobber (reg:VNx16BI P4_REGNUM)) -+ (clobber (reg:VNx16BI P5_REGNUM)) -+ (clobber (reg:VNx16BI P6_REGNUM)) -+ (clobber (reg:VNx16BI P7_REGNUM)) -+ (clobber (reg:VNx16BI P8_REGNUM)) -+ (clobber (reg:VNx16BI P9_REGNUM)) -+ (clobber (reg:VNx16BI P10_REGNUM)) -+ (clobber (reg:VNx16BI P11_REGNUM)) -+ (clobber (reg:VNx16BI P12_REGNUM)) -+ (clobber (reg:VNx16BI P13_REGNUM)) -+ (clobber (reg:VNx16BI P14_REGNUM)) -+ (clobber (reg:VNx16BI P15_REGNUM))] -+ "" -+ "smstop\tsm" -+) -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 1c127192d..82f8e574e 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -82,6 +82,8 @@ - #include "tree-dfa.h" - #include "asan.h" - #include "aarch64-feature-deps.h" -+#include "tree-pass.h" -+#include "cfgbuild.h" - - /* This file should be included last. */ - #include "target-def.h" -@@ -4377,6 +4379,26 @@ aarch64_fndecl_isa_mode (const_tree fndecl) - return aarch64_fndecl_pstate_sm (fndecl); - } - -+/* Return the state of PSTATE.SM on entry to the current function. -+ This might be different from the state of PSTATE.SM in the function -+ body. */ -+ -+static aarch64_feature_flags -+aarch64_cfun_incoming_pstate_sm () -+{ -+ return aarch64_fntype_pstate_sm (TREE_TYPE (cfun->decl)); -+} -+ -+/* Return true if a call from the current function to a function with -+ ISA mode CALLEE_MODE would involve a change to PSTATE.SM around -+ the BL instruction. */ -+ -+static bool -+aarch64_call_switches_pstate_sm (aarch64_feature_flags callee_mode) -+{ -+ return (callee_mode & ~AARCH64_ISA_MODE & AARCH64_FL_SM_STATE) != 0; -+} -+ - /* Implement TARGET_COMPATIBLE_VECTOR_TYPES_P. */ - - static bool -@@ -4400,7 +4422,7 @@ aarch64_emit_cfi_for_reg_p (unsigned int regno) - static machine_mode - aarch64_reg_save_mode (unsigned int regno) - { -- if (GP_REGNUM_P (regno)) -+ if (GP_REGNUM_P (regno) || regno == VG_REGNUM) - return DImode; - - if (FP_REGNUM_P (regno)) -@@ -4459,6 +4481,16 @@ aarch64_callee_abi (rtx cookie) - return function_abis[UINTVAL (cookie) >> AARCH64_NUM_ISA_MODES]; - } - -+/* COOKIE is a CONST_INT from an UNSPEC_CALLEE_ABI rtx. Return the -+ required ISA mode on entry to the callee, which is also the ISA -+ mode on return from the callee. */ -+ -+static aarch64_feature_flags -+aarch64_callee_isa_mode (rtx cookie) -+{ -+ return UINTVAL (cookie) & AARCH64_FL_ISA_MODES; -+} -+ - /* INSN is a call instruction. Return the CONST_INT stored in its - UNSPEC_CALLEE_ABI rtx. */ - -@@ -4481,6 +4513,15 @@ aarch64_insn_callee_abi (const rtx_insn *insn) - return aarch64_callee_abi (aarch64_insn_callee_cookie (insn)); - } - -+/* INSN is a call instruction. Return the required ISA mode on entry to -+ the callee, which is also the ISA mode on return from the callee. */ -+ -+static aarch64_feature_flags -+aarch64_insn_callee_isa_mode (const rtx_insn *insn) -+{ -+ return aarch64_callee_isa_mode (aarch64_insn_callee_cookie (insn)); -+} -+ - /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves - the lower 64 bits of a 128-bit register. Tell the compiler the callee - clobbers the top 64 bits when restoring the bottom 64 bits. */ -@@ -6645,6 +6686,437 @@ aarch64_sub_sp (rtx temp1, rtx temp2, poly_int64 delta, bool frame_related_p, - temp1, temp2, frame_related_p, emit_move_imm); - } - -+/* A streaming-compatible function needs to switch temporarily to the known -+ PSTATE.SM mode described by LOCAL_MODE. The low bit of OLD_SVCR contains -+ the runtime state of PSTATE.SM in the streaming-compatible code, before -+ the start of the switch to LOCAL_MODE. -+ -+ Emit instructions to branch around the mode switch if PSTATE.SM already -+ matches LOCAL_MODE. Return the label that the branch jumps to. */ -+ -+static rtx_insn * -+aarch64_guard_switch_pstate_sm (rtx old_svcr, aarch64_feature_flags local_mode) -+{ -+ local_mode &= AARCH64_FL_SM_STATE; -+ gcc_assert (local_mode != 0); -+ auto already_ok_cond = (local_mode & AARCH64_FL_SM_ON ? NE : EQ); -+ auto *label = gen_label_rtx (); -+ auto *jump = emit_jump_insn (gen_aarch64_tb (already_ok_cond, DImode, DImode, -+ old_svcr, const0_rtx, label)); -+ JUMP_LABEL (jump) = label; -+ return label; -+} -+ -+/* Emit code to switch from the PSTATE.SM state in OLD_MODE to the PSTATE.SM -+ state in NEW_MODE. This is known to involve either an SMSTART SM or -+ an SMSTOP SM. */ -+ -+static void -+aarch64_switch_pstate_sm (aarch64_feature_flags old_mode, -+ aarch64_feature_flags new_mode) -+{ -+ old_mode &= AARCH64_FL_SM_STATE; -+ new_mode &= AARCH64_FL_SM_STATE; -+ gcc_assert (old_mode != new_mode); -+ -+ if ((new_mode & AARCH64_FL_SM_ON) -+ || (new_mode == 0 && (old_mode & AARCH64_FL_SM_OFF))) -+ emit_insn (gen_aarch64_smstart_sm ()); -+ else -+ emit_insn (gen_aarch64_smstop_sm ()); -+} -+ -+/* As a side-effect, SMSTART SM and SMSTOP SM clobber the contents of all -+ FP and predicate registers. This class emits code to preserve any -+ necessary registers around the mode switch. -+ -+ The class uses four approaches to saving and restoring contents, enumerated -+ by group_type: -+ -+ - GPR: save and restore the contents of FP registers using GPRs. -+ This is used if the FP register contains no more than 64 significant -+ bits. The registers used are FIRST_GPR onwards. -+ -+ - MEM_128: save and restore 128-bit SIMD registers using memory. -+ -+ - MEM_SVE_PRED: save and restore full SVE predicate registers using memory. -+ -+ - MEM_SVE_DATA: save and restore full SVE vector registers using memory. -+ -+ The save slots within each memory group are consecutive, with the -+ MEM_SVE_PRED slots occupying a region below the MEM_SVE_DATA slots. -+ -+ There will only be two mode switches for each use of SME, so they should -+ not be particularly performance-sensitive. It's also rare for SIMD, SVE -+ or predicate registers to be live across mode switches. We therefore -+ don't preallocate the save slots but instead allocate them locally on -+ demand. This makes the code emitted by the class self-contained. */ -+ -+class aarch64_sme_mode_switch_regs -+{ -+public: -+ static const unsigned int FIRST_GPR = R10_REGNUM; -+ -+ void add_reg (machine_mode, unsigned int); -+ void add_call_args (rtx_call_insn *); -+ void add_call_result (rtx_call_insn *); -+ -+ void emit_prologue (); -+ void emit_epilogue (); -+ -+ /* The number of GPRs needed to save FP registers, starting from -+ FIRST_GPR. */ -+ unsigned int num_gprs () { return m_group_count[GPR]; } -+ -+private: -+ enum sequence { PROLOGUE, EPILOGUE }; -+ enum group_type { GPR, MEM_128, MEM_SVE_PRED, MEM_SVE_DATA, NUM_GROUPS }; -+ -+ /* Information about the save location for one FP, SIMD, SVE data, or -+ SVE predicate register. */ -+ struct save_location { -+ /* The register to be saved. */ -+ rtx reg; -+ -+ /* Which group the save location belongs to. */ -+ group_type group; -+ -+ /* A zero-based index of the register within the group. */ -+ unsigned int index; -+ }; -+ -+ unsigned int sve_data_headroom (); -+ rtx get_slot_mem (machine_mode, poly_int64); -+ void emit_stack_adjust (sequence, poly_int64); -+ void emit_mem_move (sequence, const save_location &, poly_int64); -+ -+ void emit_gpr_moves (sequence); -+ void emit_mem_128_moves (sequence); -+ void emit_sve_sp_adjust (sequence); -+ void emit_sve_pred_moves (sequence); -+ void emit_sve_data_moves (sequence); -+ -+ /* All save locations, in no particular order. */ -+ auto_vec m_save_locations; -+ -+ /* The number of registers in each group. */ -+ unsigned int m_group_count[NUM_GROUPS] = {}; -+}; -+ -+/* Record that (reg:MODE REGNO) needs to be preserved around the mode -+ switch. */ -+ -+void -+aarch64_sme_mode_switch_regs::add_reg (machine_mode mode, unsigned int regno) -+{ -+ if (!FP_REGNUM_P (regno) && !PR_REGNUM_P (regno)) -+ return; -+ -+ unsigned int end_regno = end_hard_regno (mode, regno); -+ unsigned int vec_flags = aarch64_classify_vector_mode (mode); -+ gcc_assert ((vec_flags & VEC_STRUCT) || end_regno == regno + 1); -+ for (; regno < end_regno; regno++) -+ { -+ machine_mode submode = mode; -+ if (vec_flags & VEC_STRUCT) -+ { -+ if (vec_flags & VEC_SVE_DATA) -+ submode = SVE_BYTE_MODE; -+ else if (vec_flags & VEC_PARTIAL) -+ submode = V8QImode; -+ else -+ submode = V16QImode; -+ } -+ save_location loc; -+ loc.reg = gen_rtx_REG (submode, regno); -+ if (vec_flags == VEC_SVE_PRED) -+ { -+ gcc_assert (PR_REGNUM_P (regno)); -+ loc.group = MEM_SVE_PRED; -+ } -+ else -+ { -+ gcc_assert (FP_REGNUM_P (regno)); -+ if (known_le (GET_MODE_SIZE (submode), 8)) -+ loc.group = GPR; -+ else if (known_eq (GET_MODE_SIZE (submode), 16)) -+ loc.group = MEM_128; -+ else -+ loc.group = MEM_SVE_DATA; -+ } -+ loc.index = m_group_count[loc.group]++; -+ m_save_locations.quick_push (loc); -+ } -+} -+ -+/* Record that the arguments to CALL_INSN need to be preserved around -+ the mode switch. */ -+ -+void -+aarch64_sme_mode_switch_regs::add_call_args (rtx_call_insn *call_insn) -+{ -+ for (rtx node = CALL_INSN_FUNCTION_USAGE (call_insn); -+ node; node = XEXP (node, 1)) -+ { -+ rtx item = XEXP (node, 0); -+ if (GET_CODE (item) != USE) -+ continue; -+ item = XEXP (item, 0); -+ if (!REG_P (item)) -+ continue; -+ add_reg (GET_MODE (item), REGNO (item)); -+ } -+} -+ -+/* Record that the return value from CALL_INSN (if any) needs to be -+ preserved around the mode switch. */ -+ -+void -+aarch64_sme_mode_switch_regs::add_call_result (rtx_call_insn *call_insn) -+{ -+ rtx pat = PATTERN (call_insn); -+ gcc_assert (GET_CODE (pat) == PARALLEL); -+ pat = XVECEXP (pat, 0, 0); -+ if (GET_CODE (pat) == CALL) -+ return; -+ rtx dest = SET_DEST (pat); -+ if (GET_CODE (dest) == PARALLEL) -+ for (int i = 0; i < XVECLEN (dest, 0); ++i) -+ { -+ rtx x = XVECEXP (dest, 0, i); -+ gcc_assert (GET_CODE (x) == EXPR_LIST); -+ rtx reg = XEXP (x, 0); -+ add_reg (GET_MODE (reg), REGNO (reg)); -+ } -+ else -+ add_reg (GET_MODE (dest), REGNO (dest)); -+} -+ -+/* Emit code to save registers before the mode switch. */ -+ -+void -+aarch64_sme_mode_switch_regs::emit_prologue () -+{ -+ emit_sve_sp_adjust (PROLOGUE); -+ emit_sve_pred_moves (PROLOGUE); -+ emit_sve_data_moves (PROLOGUE); -+ emit_mem_128_moves (PROLOGUE); -+ emit_gpr_moves (PROLOGUE); -+} -+ -+/* Emit code to restore registers after the mode switch. */ -+ -+void -+aarch64_sme_mode_switch_regs::emit_epilogue () -+{ -+ emit_gpr_moves (EPILOGUE); -+ emit_mem_128_moves (EPILOGUE); -+ emit_sve_pred_moves (EPILOGUE); -+ emit_sve_data_moves (EPILOGUE); -+ emit_sve_sp_adjust (EPILOGUE); -+} -+ -+/* The SVE predicate registers are stored below the SVE data registers, -+ with the predicate save area being padded to a data-register-sized -+ boundary. Return the size of this padded area as a whole number -+ of data register slots. */ -+ -+unsigned int -+aarch64_sme_mode_switch_regs::sve_data_headroom () -+{ -+ return CEIL (m_group_count[MEM_SVE_PRED], 8); -+} -+ -+/* Return a memory reference of mode MODE to OFFSET bytes from the -+ stack pointer. */ -+ -+rtx -+aarch64_sme_mode_switch_regs::get_slot_mem (machine_mode mode, -+ poly_int64 offset) -+{ -+ rtx addr = plus_constant (Pmode, stack_pointer_rtx, offset); -+ return gen_rtx_MEM (mode, addr); -+} -+ -+/* Allocate or deallocate SIZE bytes of stack space: SEQ decides which. */ -+ -+void -+aarch64_sme_mode_switch_regs::emit_stack_adjust (sequence seq, -+ poly_int64 size) -+{ -+ if (seq == PROLOGUE) -+ size = -size; -+ emit_insn (gen_rtx_SET (stack_pointer_rtx, -+ plus_constant (Pmode, stack_pointer_rtx, size))); -+} -+ -+/* Save or restore the register in LOC, whose slot is OFFSET bytes from -+ the stack pointer. SEQ chooses between saving and restoring. */ -+ -+void -+aarch64_sme_mode_switch_regs::emit_mem_move (sequence seq, -+ const save_location &loc, -+ poly_int64 offset) -+{ -+ rtx mem = get_slot_mem (GET_MODE (loc.reg), offset); -+ if (seq == PROLOGUE) -+ emit_move_insn (mem, loc.reg); -+ else -+ emit_move_insn (loc.reg, mem); -+} -+ -+/* Emit instructions to save or restore the GPR group. SEQ chooses between -+ saving and restoring. */ -+ -+void -+aarch64_sme_mode_switch_regs::emit_gpr_moves (sequence seq) -+{ -+ for (auto &loc : m_save_locations) -+ if (loc.group == GPR) -+ { -+ gcc_assert (loc.index < 8); -+ rtx gpr = gen_rtx_REG (GET_MODE (loc.reg), FIRST_GPR + loc.index); -+ if (seq == PROLOGUE) -+ emit_move_insn (gpr, loc.reg); -+ else -+ emit_move_insn (loc.reg, gpr); -+ } -+} -+ -+/* Emit instructions to save or restore the MEM_128 group. SEQ chooses -+ between saving and restoring. */ -+ -+void -+aarch64_sme_mode_switch_regs::emit_mem_128_moves (sequence seq) -+{ -+ HOST_WIDE_INT count = m_group_count[MEM_128]; -+ if (count == 0) -+ return; -+ -+ auto sp = stack_pointer_rtx; -+ auto sp_adjust = (seq == PROLOGUE ? -count : count) * 16; -+ -+ /* Pick a common mode that supports LDR & STR with pre/post-modification -+ and LDP & STP with pre/post-modification. */ -+ auto mode = TFmode; -+ -+ /* An instruction pattern that should be emitted at the end. */ -+ rtx last_pat = NULL_RTX; -+ -+ /* A previous MEM_128 location that hasn't been handled yet. */ -+ save_location *prev_loc = nullptr; -+ -+ /* Look for LDP/STPs and record any leftover LDR/STR in PREV_LOC. */ -+ for (auto &loc : m_save_locations) -+ if (loc.group == MEM_128) -+ { -+ if (!prev_loc) -+ { -+ prev_loc = &loc; -+ continue; -+ } -+ gcc_assert (loc.index == prev_loc->index + 1); -+ -+ /* The offset of the base of the save area from the current -+ stack pointer. */ -+ HOST_WIDE_INT bias = 0; -+ if (prev_loc->index == 0 && seq == PROLOGUE) -+ bias = sp_adjust; -+ -+ /* Get the two sets in the LDP/STP. */ -+ rtx ops[] = { -+ gen_rtx_REG (mode, REGNO (prev_loc->reg)), -+ get_slot_mem (mode, prev_loc->index * 16 + bias), -+ gen_rtx_REG (mode, REGNO (loc.reg)), -+ get_slot_mem (mode, loc.index * 16 + bias) -+ }; -+ unsigned int lhs = (seq == PROLOGUE); -+ rtx set1 = gen_rtx_SET (ops[lhs], ops[1 - lhs]); -+ rtx set2 = gen_rtx_SET (ops[lhs + 2], ops[3 - lhs]); -+ -+ /* Combine the sets with any stack allocation/deallocation. */ -+ rtvec vec; -+ if (prev_loc->index == 0) -+ { -+ rtx plus_sp = plus_constant (Pmode, sp, sp_adjust); -+ vec = gen_rtvec (3, gen_rtx_SET (sp, plus_sp), set1, set2); -+ } -+ else -+ vec = gen_rtvec (2, set1, set2); -+ rtx pat = gen_rtx_PARALLEL (VOIDmode, vec); -+ -+ /* Queue a deallocation to the end, otherwise emit the -+ instruction now. */ -+ if (seq == EPILOGUE && prev_loc->index == 0) -+ last_pat = pat; -+ else -+ emit_insn (pat); -+ prev_loc = nullptr; -+ } -+ -+ /* Handle any leftover LDR/STR. */ -+ if (prev_loc) -+ { -+ rtx reg = gen_rtx_REG (mode, REGNO (prev_loc->reg)); -+ rtx addr; -+ if (prev_loc->index != 0) -+ addr = plus_constant (Pmode, sp, prev_loc->index * 16); -+ else if (seq == PROLOGUE) -+ { -+ rtx allocate = plus_constant (Pmode, sp, -count * 16); -+ addr = gen_rtx_PRE_MODIFY (Pmode, sp, allocate); -+ } -+ else -+ { -+ rtx deallocate = plus_constant (Pmode, sp, count * 16); -+ addr = gen_rtx_POST_MODIFY (Pmode, sp, deallocate); -+ } -+ rtx mem = gen_rtx_MEM (mode, addr); -+ if (seq == PROLOGUE) -+ emit_move_insn (mem, reg); -+ else -+ emit_move_insn (reg, mem); -+ } -+ -+ if (last_pat) -+ emit_insn (last_pat); -+} -+ -+/* Allocate or deallocate the stack space needed by the SVE groups. -+ SEQ chooses between allocating and deallocating. */ -+ -+void -+aarch64_sme_mode_switch_regs::emit_sve_sp_adjust (sequence seq) -+{ -+ if (unsigned int count = m_group_count[MEM_SVE_DATA] + sve_data_headroom ()) -+ emit_stack_adjust (seq, count * BYTES_PER_SVE_VECTOR); -+} -+ -+/* Save or restore the MEM_SVE_DATA group. SEQ chooses between saving -+ and restoring. */ -+ -+void -+aarch64_sme_mode_switch_regs::emit_sve_data_moves (sequence seq) -+{ -+ for (auto &loc : m_save_locations) -+ if (loc.group == MEM_SVE_DATA) -+ { -+ auto index = loc.index + sve_data_headroom (); -+ emit_mem_move (seq, loc, index * BYTES_PER_SVE_VECTOR); -+ } -+} -+ -+/* Save or restore the MEM_SVE_PRED group. SEQ chooses between saving -+ and restoring. */ -+ -+void -+aarch64_sme_mode_switch_regs::emit_sve_pred_moves (sequence seq) -+{ -+ for (auto &loc : m_save_locations) -+ if (loc.group == MEM_SVE_PRED) -+ emit_mem_move (seq, loc, loc.index * BYTES_PER_SVE_PRED); -+} -+ - /* Set DEST to (vec_series BASE STEP). */ - - static void -@@ -8211,6 +8683,40 @@ on_stack: - return; - } - -+/* Add the current argument register to the set of those that need -+ to be saved and restored around a change to PSTATE.SM. */ -+ -+static void -+aarch64_record_sme_mode_switch_args (CUMULATIVE_ARGS *pcum) -+{ -+ subrtx_var_iterator::array_type array; -+ FOR_EACH_SUBRTX_VAR (iter, array, pcum->aapcs_reg, NONCONST) -+ { -+ rtx x = *iter; -+ if (REG_P (x) && (FP_REGNUM_P (REGNO (x)) || PR_REGNUM_P (REGNO (x)))) -+ { -+ unsigned int i = pcum->num_sme_mode_switch_args++; -+ gcc_assert (i < ARRAY_SIZE (pcum->sme_mode_switch_args)); -+ pcum->sme_mode_switch_args[i] = x; -+ } -+ } -+} -+ -+/* Return a parallel that contains all the registers that need to be -+ saved around a change to PSTATE.SM. Return const0_rtx if there is -+ no such mode switch, or if no registers need to be saved. */ -+ -+static rtx -+aarch64_finish_sme_mode_switch_args (CUMULATIVE_ARGS *pcum) -+{ -+ if (!pcum->num_sme_mode_switch_args) -+ return const0_rtx; -+ -+ auto argvec = gen_rtvec_v (pcum->num_sme_mode_switch_args, -+ pcum->sme_mode_switch_args); -+ return gen_rtx_PARALLEL (VOIDmode, argvec); -+} -+ - /* Implement TARGET_FUNCTION_ARG. */ - - static rtx -@@ -8222,7 +8728,13 @@ aarch64_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg) - || pcum->pcs_variant == ARM_PCS_SVE); - - if (arg.end_marker_p ()) -- return aarch64_gen_callee_cookie (pcum->isa_mode, pcum->pcs_variant); -+ { -+ rtx abi_cookie = aarch64_gen_callee_cookie (pcum->isa_mode, -+ pcum->pcs_variant); -+ rtx sme_mode_switch_args = aarch64_finish_sme_mode_switch_args (pcum); -+ return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, abi_cookie, -+ sme_mode_switch_args)); -+ } - - aarch64_layout_arg (pcum_v, arg); - return pcum->aapcs_reg; -@@ -8257,6 +8769,7 @@ aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, - pcum->aapcs_stack_words = 0; - pcum->aapcs_stack_size = 0; - pcum->silent_p = silent_p; -+ pcum->num_sme_mode_switch_args = 0; - - if (!silent_p - && !TARGET_FLOAT -@@ -8297,6 +8810,10 @@ aarch64_function_arg_advance (cumulative_args_t pcum_v, - aarch64_layout_arg (pcum_v, arg); - gcc_assert ((pcum->aapcs_reg != NULL_RTX) - != (pcum->aapcs_stack_words != 0)); -+ if (pcum->aapcs_reg -+ && aarch64_call_switches_pstate_sm (pcum->isa_mode)) -+ aarch64_record_sme_mode_switch_args (pcum); -+ - pcum->aapcs_arg_processed = false; - pcum->aapcs_ncrn = pcum->aapcs_nextncrn; - pcum->aapcs_nvrn = pcum->aapcs_nextnvrn; -@@ -8747,6 +9264,30 @@ aarch64_save_regs_above_locals_p () - return crtl->stack_protect_guard; - } - -+/* Return true if the current function needs to record the incoming -+ value of PSTATE.SM. */ -+static bool -+aarch64_need_old_pstate_sm () -+{ -+ /* Exit early if the incoming value of PSTATE.SM is known at -+ compile time. */ -+ if (aarch64_cfun_incoming_pstate_sm () != 0) -+ return false; -+ -+ if (cfun->machine->call_switches_pstate_sm) -+ for (auto insn = get_insns (); insn; insn = NEXT_INSN (insn)) -+ if (auto *call = dyn_cast (insn)) -+ if (!SIBLING_CALL_P (call)) -+ { -+ /* Return true if there is a call to a non-streaming-compatible -+ function. */ -+ auto callee_isa_mode = aarch64_insn_callee_isa_mode (call); -+ if (aarch64_call_switches_pstate_sm (callee_isa_mode)) -+ return true; -+ } -+ return false; -+} -+ - /* Mark the registers that need to be saved by the callee and calculate - the size of the callee-saved registers area and frame record (both FP - and LR may be omitted). */ -@@ -8780,6 +9321,7 @@ aarch64_layout_frame (void) - /* First mark all the registers that really need to be saved... */ - for (regno = 0; regno <= LAST_SAVED_REGNUM; regno++) - frame.reg_offset[regno] = SLOT_NOT_REQUIRED; -+ frame.old_svcr_offset = SLOT_NOT_REQUIRED; - - /* ... that includes the eh data registers (if needed)... */ - if (crtl->calls_eh_return) -@@ -8932,6 +9474,21 @@ aarch64_layout_frame (void) - if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) - allocate_gpr_slot (regno); - -+ if (aarch64_need_old_pstate_sm ()) -+ { -+ frame.old_svcr_offset = offset; -+ offset += UNITS_PER_WORD; -+ } -+ -+ /* If the current function changes the SVE vector length, ensure that the -+ old value of the DWARF VG register is saved and available in the CFI, -+ so that outer frames with VL-sized offsets can be processed correctly. */ -+ if (cfun->machine->call_switches_pstate_sm) -+ { -+ frame.reg_offset[VG_REGNUM] = offset; -+ offset += UNITS_PER_WORD; -+ } -+ - poly_int64 max_int_offset = offset; - offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); - bool has_align_gap = maybe_ne (offset, max_int_offset); -@@ -8969,8 +9526,6 @@ aarch64_layout_frame (void) - if (push_regs.size () > 1) - frame.wb_push_candidate2 = push_regs[1]; - } -- else -- gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size)); - - /* With stack-clash, a register must be saved in non-leaf functions. - The saving of the bottommost register counts as an implicit probe, -@@ -9078,7 +9633,8 @@ aarch64_layout_frame (void) - frame.initial_adjust = frame.frame_size - frame.bytes_below_saved_regs; - frame.final_adjust = frame.bytes_below_saved_regs; - } -- else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp) -+ else if (frame.wb_push_candidate1 != INVALID_REGNUM -+ && frame.bytes_above_hard_fp.is_constant (&const_above_fp) - && const_above_fp < max_push_offset) - { - /* Frame with large area below the saved registers, or with SVE saves, -@@ -9459,7 +10015,13 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, - - machine_mode mode = aarch64_reg_save_mode (regno); - rtx reg = gen_rtx_REG (mode, regno); -+ rtx move_src = reg; - offset = frame.reg_offset[regno] - bytes_below_sp; -+ if (regno == VG_REGNUM) -+ { -+ move_src = gen_rtx_REG (DImode, IP0_REGNUM); -+ emit_move_insn (move_src, gen_int_mode (aarch64_sve_vg, DImode)); -+ } - rtx base_rtx = stack_pointer_rtx; - poly_int64 sp_offset = offset; - -@@ -9467,7 +10029,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, - if (mode == VNx2DImode && BYTES_BIG_ENDIAN) - aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, - offset, ptrue); -- else if (GP_REGNUM_P (regno) -+ else if (GP_REGNUM_P (REGNO (reg)) - && (!offset.is_constant (&const_offset) || const_offset >= 512)) - { - poly_int64 fp_offset = frame.bytes_below_hard_fp - bytes_below_sp; -@@ -9490,6 +10052,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, - - unsigned int regno2; - if (!aarch64_sve_mode_p (mode) -+ && reg == move_src - && i + 1 < regs.size () - && (regno2 = regs[i + 1], !skip_save_p (regno2)) - && known_eq (GET_MODE_SIZE (mode), -@@ -9521,17 +10084,24 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp, - } - else if (mode == VNx2DImode && BYTES_BIG_ENDIAN) - { -- insn = emit_insn (gen_aarch64_pred_mov (mode, mem, ptrue, reg)); -+ insn = emit_insn (gen_aarch64_pred_mov (mode, mem, ptrue, move_src)); - need_cfa_note_p = true; - } - else if (aarch64_sve_mode_p (mode)) -- insn = emit_insn (gen_rtx_SET (mem, reg)); -+ insn = emit_insn (gen_rtx_SET (mem, move_src)); - else -- insn = emit_move_insn (mem, reg); -+ insn = emit_move_insn (mem, move_src); - - RTX_FRAME_RELATED_P (insn) = frame_related_p; - if (frame_related_p && need_cfa_note_p) - aarch64_add_cfa_expression (insn, reg, stack_pointer_rtx, sp_offset); -+ else if (frame_related_p && move_src != reg) -+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_SET (mem, reg)); -+ -+ /* Emit a fake instruction to indicate that the VG save slot has -+ been initialized. */ -+ if (regno == VG_REGNUM) -+ emit_insn (gen_aarch64_old_vg_saved (move_src, mem)); - } - } - -@@ -9754,6 +10324,10 @@ aarch64_get_separate_components (void) - bitmap_clear_bit (components, frame.hard_fp_save_and_probe); - } - -+ /* The VG save sequence needs a temporary GPR. Punt for now on trying -+ to find one. */ -+ bitmap_clear_bit (components, VG_REGNUM); -+ - return components; - } - -@@ -10249,6 +10823,47 @@ aarch64_epilogue_uses (int regno) - return 0; - } - -+/* The current function's frame has a save slot for the incoming state -+ of SVCR. Return a legitimate memory for the slot, based on the hard -+ frame pointer. */ -+ -+static rtx -+aarch64_old_svcr_mem () -+{ -+ gcc_assert (frame_pointer_needed -+ && known_ge (cfun->machine->frame.old_svcr_offset, 0)); -+ rtx base = hard_frame_pointer_rtx; -+ poly_int64 offset = (0 -+ /* hard fp -> bottom of frame. */ -+ - cfun->machine->frame.bytes_below_hard_fp -+ /* bottom of frame -> save slot. */ -+ + cfun->machine->frame.old_svcr_offset); -+ return gen_frame_mem (DImode, plus_constant (Pmode, base, offset)); -+} -+ -+/* The current function's frame has a save slot for the incoming state -+ of SVCR. Load the slot into register REGNO and return the register. */ -+ -+static rtx -+aarch64_read_old_svcr (unsigned int regno) -+{ -+ rtx svcr = gen_rtx_REG (DImode, regno); -+ emit_move_insn (svcr, aarch64_old_svcr_mem ()); -+ return svcr; -+} -+ -+/* Like the rtx version of aarch64_guard_switch_pstate_sm, but first -+ load the incoming value of SVCR from its save slot into temporary -+ register REGNO. */ -+ -+static rtx_insn * -+aarch64_guard_switch_pstate_sm (unsigned int regno, -+ aarch64_feature_flags local_mode) -+{ -+ rtx old_svcr = aarch64_read_old_svcr (regno); -+ return aarch64_guard_switch_pstate_sm (old_svcr, local_mode); -+} -+ - /* AArch64 stack frames generated by this compiler look like: - - +-------------------------------+ -@@ -10463,6 +11078,12 @@ aarch64_expand_prologue (void) - - aarch64_save_callee_saves (bytes_below_sp, frame.saved_gprs, true, - emit_frame_chain); -+ if (maybe_ge (frame.reg_offset[VG_REGNUM], 0)) -+ { -+ unsigned int saved_regs[] = { VG_REGNUM }; -+ aarch64_save_callee_saves (bytes_below_sp, saved_regs, true, -+ emit_frame_chain); -+ } - if (maybe_ne (sve_callee_adjust, 0)) - { - gcc_assert (!flag_stack_clash_protection -@@ -10484,6 +11105,40 @@ aarch64_expand_prologue (void) - !frame_pointer_needed, true); - if (emit_frame_chain && maybe_ne (final_adjust, 0)) - aarch64_emit_stack_tie (hard_frame_pointer_rtx); -+ -+ /* Save the incoming value of PSTATE.SM, if required. */ -+ if (known_ge (frame.old_svcr_offset, 0)) -+ { -+ rtx mem = aarch64_old_svcr_mem (); -+ MEM_VOLATILE_P (mem) = 1; -+ if (TARGET_SME) -+ { -+ rtx reg = gen_rtx_REG (DImode, IP0_REGNUM); -+ emit_insn (gen_aarch64_read_svcr (reg)); -+ emit_move_insn (mem, reg); -+ } -+ else -+ { -+ rtx old_r0 = NULL_RTX, old_r1 = NULL_RTX; -+ auto &args = crtl->args.info; -+ if (args.aapcs_ncrn > 0) -+ { -+ old_r0 = gen_rtx_REG (DImode, PROBE_STACK_FIRST_REGNUM); -+ emit_move_insn (old_r0, gen_rtx_REG (DImode, R0_REGNUM)); -+ } -+ if (args.aapcs_ncrn > 1) -+ { -+ old_r1 = gen_rtx_REG (DImode, PROBE_STACK_SECOND_REGNUM); -+ emit_move_insn (old_r1, gen_rtx_REG (DImode, R1_REGNUM)); -+ } -+ emit_insn (gen_aarch64_get_sme_state ()); -+ emit_move_insn (mem, gen_rtx_REG (DImode, R0_REGNUM)); -+ if (old_r0) -+ emit_move_insn (gen_rtx_REG (DImode, R0_REGNUM), old_r0); -+ if (old_r1) -+ emit_move_insn (gen_rtx_REG (DImode, R1_REGNUM), old_r1); -+ } -+ } - } - - /* Return TRUE if we can use a simple_return insn. -@@ -11730,17 +12385,33 @@ aarch64_start_call_args (cumulative_args_t ca_v) - RESULT is the register in which the result is returned. It's NULL for - "call" and "sibcall". - MEM is the location of the function call. -- CALLEE_ABI is a const_int that gives the arm_pcs of the callee. -+ COOKIE is either: -+ - a const_int that gives the argument to the call's UNSPEC_CALLEE_ABI. -+ - a PARALLEL that contains such a const_int as its first element. -+ The second element is a PARALLEL that lists all the argument -+ registers that need to be saved and restored around a change -+ in PSTATE.SM, or const0_rtx if no such switch is needed. - SIBCALL indicates whether this function call is normal call or sibling call. - It will generate different pattern accordingly. */ - - void --aarch64_expand_call (rtx result, rtx mem, rtx callee_abi, bool sibcall) -+aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall) - { - rtx call, callee, tmp; - rtvec vec; - machine_mode mode; - -+ rtx callee_abi = cookie; -+ rtx sme_mode_switch_args = const0_rtx; -+ if (GET_CODE (cookie) == PARALLEL) -+ { -+ callee_abi = XVECEXP (cookie, 0, 0); -+ sme_mode_switch_args = XVECEXP (cookie, 0, 1); -+ } -+ -+ gcc_assert (CONST_INT_P (callee_abi)); -+ auto callee_isa_mode = aarch64_callee_isa_mode (callee_abi); -+ - gcc_assert (MEM_P (mem)); - callee = XEXP (mem, 0); - mode = GET_MODE (callee); -@@ -11765,26 +12436,75 @@ aarch64_expand_call (rtx result, rtx mem, rtx callee_abi, bool sibcall) - else - tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM)); - -- gcc_assert (CONST_INT_P (callee_abi)); - callee_abi = gen_rtx_UNSPEC (DImode, gen_rtvec (1, callee_abi), - UNSPEC_CALLEE_ABI); - - vec = gen_rtvec (3, call, callee_abi, tmp); - call = gen_rtx_PARALLEL (VOIDmode, vec); - -- aarch64_emit_call_insn (call); -+ auto call_insn = aarch64_emit_call_insn (call); -+ -+ /* Check whether the call requires a change to PSTATE.SM. We can't -+ emit the instructions to change PSTATE.SM yet, since they involve -+ a change in vector length and a change in instruction set, which -+ cannot be represented in RTL. -+ -+ For now, just record which registers will be clobbered and used -+ by the changes to PSTATE.SM. */ -+ if (!sibcall && aarch64_call_switches_pstate_sm (callee_isa_mode)) -+ { -+ aarch64_sme_mode_switch_regs args_switch; -+ if (sme_mode_switch_args != const0_rtx) -+ { -+ unsigned int num_args = XVECLEN (sme_mode_switch_args, 0); -+ for (unsigned int i = 0; i < num_args; ++i) -+ { -+ rtx x = XVECEXP (sme_mode_switch_args, 0, i); -+ args_switch.add_reg (GET_MODE (x), REGNO (x)); -+ } -+ } -+ -+ aarch64_sme_mode_switch_regs result_switch; -+ if (result) -+ result_switch.add_call_result (call_insn); -+ -+ unsigned int num_gprs = MAX (args_switch.num_gprs (), -+ result_switch.num_gprs ()); -+ for (unsigned int i = 0; i < num_gprs; ++i) -+ clobber_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), -+ gen_rtx_REG (DImode, args_switch.FIRST_GPR + i)); -+ -+ for (int regno = V0_REGNUM; regno < V0_REGNUM + 32; regno += 4) -+ clobber_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), -+ gen_rtx_REG (V4x16QImode, regno)); -+ -+ for (int regno = P0_REGNUM; regno < P0_REGNUM + 16; regno += 1) -+ clobber_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), -+ gen_rtx_REG (VNx16BImode, regno)); -+ -+ /* Ensure that the VG save slot has been initialized. Also emit -+ an instruction to model the effect of the temporary clobber -+ of VG, so that the prologue/epilogue pass sees the need to -+ save the old value. */ -+ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), -+ gen_rtx_REG (DImode, VG_REGNUM)); -+ emit_insn_before (gen_aarch64_update_vg (), call_insn); -+ -+ cfun->machine->call_switches_pstate_sm = true; -+ } - } - - /* Emit call insn with PAT and do aarch64-specific handling. */ - --void -+rtx_call_insn * - aarch64_emit_call_insn (rtx pat) - { -- rtx insn = emit_call_insn (pat); -+ auto insn = emit_call_insn (pat); - - rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn); - clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM)); - clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM)); -+ return as_a (insn); - } - - machine_mode -@@ -13069,6 +13789,16 @@ aarch64_secondary_memory_needed (machine_mode mode, reg_class_t class1, - return false; - } - -+/* Implement TARGET_FRAME_POINTER_REQUIRED. */ -+ -+static bool -+aarch64_frame_pointer_required () -+{ -+ /* If the function needs to record the incoming value of PSTATE.SM, -+ make sure that the slot is accessible from the frame pointer. */ -+ return aarch64_need_old_pstate_sm (); -+} -+ - static bool - aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) - { -@@ -20607,7 +21337,8 @@ aarch64_conditional_register_usage (void) - call_used_regs[i] = 1; - } - -- /* Only allow the FFR and FFRT to be accessed via special patterns. */ -+ /* Only allow these registers to be accessed via special patterns. */ -+ CLEAR_HARD_REG_BIT (operand_reg_set, VG_REGNUM); - CLEAR_HARD_REG_BIT (operand_reg_set, FFR_REGNUM); - CLEAR_HARD_REG_BIT (operand_reg_set, FFRT_REGNUM); - -@@ -27903,6 +28634,123 @@ aarch64_indirect_call_asm (rtx addr) - return ""; - } - -+/* If CALL involves a change in PSTATE.SM, emit the instructions needed -+ to switch to the new mode and the instructions needed to restore the -+ original mode. Return true if something changed. */ -+static bool -+aarch64_switch_pstate_sm_for_call (rtx_call_insn *call) -+{ -+ /* Mode switches for sibling calls are handled via the epilogue. */ -+ if (SIBLING_CALL_P (call)) -+ return false; -+ -+ auto callee_isa_mode = aarch64_insn_callee_isa_mode (call); -+ if (!aarch64_call_switches_pstate_sm (callee_isa_mode)) -+ return false; -+ -+ /* Switch mode before the call, preserving any argument registers -+ across the switch. */ -+ start_sequence (); -+ rtx_insn *args_guard_label = nullptr; -+ if (TARGET_STREAMING_COMPATIBLE) -+ args_guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM, -+ callee_isa_mode); -+ aarch64_sme_mode_switch_regs args_switch; -+ args_switch.add_call_args (call); -+ args_switch.emit_prologue (); -+ aarch64_switch_pstate_sm (AARCH64_ISA_MODE, callee_isa_mode); -+ args_switch.emit_epilogue (); -+ if (args_guard_label) -+ emit_label (args_guard_label); -+ auto args_seq = get_insns (); -+ end_sequence (); -+ emit_insn_before (args_seq, call); -+ -+ if (find_reg_note (call, REG_NORETURN, NULL_RTX)) -+ return true; -+ -+ /* Switch mode after the call, preserving any return registers across -+ the switch. */ -+ start_sequence (); -+ rtx_insn *return_guard_label = nullptr; -+ if (TARGET_STREAMING_COMPATIBLE) -+ return_guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM, -+ callee_isa_mode); -+ aarch64_sme_mode_switch_regs return_switch; -+ return_switch.add_call_result (call); -+ return_switch.emit_prologue (); -+ aarch64_switch_pstate_sm (callee_isa_mode, AARCH64_ISA_MODE); -+ return_switch.emit_epilogue (); -+ if (return_guard_label) -+ emit_label (return_guard_label); -+ auto result_seq = get_insns (); -+ end_sequence (); -+ emit_insn_after (result_seq, call); -+ return true; -+} -+ -+namespace { -+ -+const pass_data pass_data_switch_pstate_sm = -+{ -+ RTL_PASS, // type -+ "smstarts", // name -+ OPTGROUP_NONE, // optinfo_flags -+ TV_NONE, // tv_id -+ 0, // properties_required -+ 0, // properties_provided -+ 0, // properties_destroyed -+ 0, // todo_flags_start -+ TODO_df_finish, // todo_flags_finish -+}; -+ -+class pass_switch_pstate_sm : public rtl_opt_pass -+{ -+public: -+ pass_switch_pstate_sm (gcc::context *ctxt) -+ : rtl_opt_pass (pass_data_switch_pstate_sm, ctxt) -+ {} -+ -+ // opt_pass methods: -+ bool gate (function *) override final; -+ unsigned int execute (function *) override final; -+}; -+ -+bool -+pass_switch_pstate_sm::gate (function *) -+{ -+ return cfun->machine->call_switches_pstate_sm; -+} -+ -+/* Emit any instructions needed to switch PSTATE.SM. */ -+unsigned int -+pass_switch_pstate_sm::execute (function *fn) -+{ -+ basic_block bb; -+ -+ auto_sbitmap blocks (last_basic_block_for_fn (cfun)); -+ bitmap_clear (blocks); -+ FOR_EACH_BB_FN (bb, fn) -+ { -+ rtx_insn *insn; -+ FOR_BB_INSNS (bb, insn) -+ if (auto *call = dyn_cast (insn)) -+ if (aarch64_switch_pstate_sm_for_call (call)) -+ bitmap_set_bit (blocks, bb->index); -+ } -+ find_many_sub_basic_blocks (blocks); -+ clear_aux_for_blocks (); -+ return 0; -+} -+ -+} -+ -+rtl_opt_pass * -+make_pass_switch_pstate_sm (gcc::context *ctxt) -+{ -+ return new pass_switch_pstate_sm (ctxt); -+} -+ - /* Target-specific selftests. */ - - #if CHECKING_P -@@ -28176,6 +29024,9 @@ aarch64_get_v16qi_mode () - #undef TARGET_CALLEE_COPIES - #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_arg_info_false - -+#undef TARGET_FRAME_POINTER_REQUIRED -+#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required -+ - #undef TARGET_CAN_ELIMINATE - #define TARGET_CAN_ELIMINATE aarch64_can_eliminate - -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 1591cde8b..6bfe55968 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -255,6 +255,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - /* The current function is a normal non-streaming function. */ - #define TARGET_NON_STREAMING (AARCH64_ISA_SM_OFF) - -+/* The current function has a streaming-compatible body. */ -+#define TARGET_STREAMING_COMPATIBLE \ -+ ((aarch64_isa_flags & AARCH64_FL_SM_STATE) == 0) -+ - /* Crypto is an optional extension to AdvSIMD. */ - #define TARGET_CRYPTO (AARCH64_ISA_CRYPTO) - -@@ -461,7 +465,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - 0, 0, 0, 0, 0, 0, 0, 0, /* V8 - V15 */ \ - 1, 1, 1, 1, 1, 1, 1, 1, /* V16 - V23 */ \ - 1, 1, 1, 1, 1, 1, 1, 1, /* V24 - V31 */ \ -- 1, 1, 1, 1, /* SFP, AP, CC, VG */ \ -+ 1, 1, 1, 0, /* SFP, AP, CC, VG */ \ - 1, 1, 1, 1, 1, 1, 1, 1, /* P0 - P7 */ \ - 1, 1, 1, 1, 1, 1, 1, 1, /* P8 - P15 */ \ - 1, 1 /* FFR and FFRT */ \ -@@ -802,6 +806,13 @@ struct GTY (()) aarch64_frame - vec *saved_fprs; - vec *saved_prs; - -+ /* The offset from the base of the frame of a 64-bit slot whose low -+ bit contains the incoming value of PSTATE.SM. This slot must be -+ within reach of the hard frame pointer. -+ -+ The offset is -1 if such a slot isn't needed. */ -+ poly_int64 old_svcr_offset; -+ - /* The number of extra stack bytes taken up by register varargs. - This area is allocated by the callee at the very top of the - frame. This value is rounded up to a multiple of -@@ -910,6 +921,12 @@ typedef struct GTY (()) machine_function - /* One entry for each general purpose register. */ - rtx call_via[SP_REGNUM]; - bool label_is_assembled; -+ -+ /* True if we've expanded at least one call to a function that changes -+ PSTATE.SM. This should only be used for saving compile time: false -+ guarantees that no such mode switch exists. */ -+ bool call_switches_pstate_sm; -+ - /* A set of all decls that have been passed to a vld1 intrinsic in the - current function. This is used to help guide the vector cost model. */ - hash_set *vector_load_decls; -@@ -978,6 +995,12 @@ typedef struct - stack arg area so far. */ - bool silent_p; /* True if we should act silently, rather than - raise an error for invalid calls. */ -+ -+ /* A list of registers that need to be saved and restored around a -+ change to PSTATE.SM. An auto_vec would be more convenient, but those -+ can't be copied. */ -+ unsigned int num_sme_mode_switch_args; -+ rtx sme_mode_switch_args[12]; - } CUMULATIVE_ARGS; - #endif - -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index 2ce123255..bb867de74 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -970,7 +970,7 @@ - operands[1]); - }) - --(define_insn "*tb1" -+(define_insn "@aarch64_tb" - [(set (pc) (if_then_else - (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r") - (const_int 1) -@@ -1057,7 +1057,7 @@ - [(parallel - [(call (match_operand 0 "memory_operand") - (match_operand 1 "general_operand")) -- (unspec:DI [(match_operand 2 "const_int_operand")] UNSPEC_CALLEE_ABI) -+ (unspec:DI [(match_operand 2)] UNSPEC_CALLEE_ABI) - (clobber (reg:DI LR_REGNUM))])] - "" - " -@@ -1083,7 +1083,7 @@ - [(set (match_operand 0 "") - (call (match_operand 1 "memory_operand") - (match_operand 2 "general_operand"))) -- (unspec:DI [(match_operand 3 "const_int_operand")] UNSPEC_CALLEE_ABI) -+ (unspec:DI [(match_operand 3)] UNSPEC_CALLEE_ABI) - (clobber (reg:DI LR_REGNUM))])] - "" - " -@@ -1110,7 +1110,7 @@ - [(parallel - [(call (match_operand 0 "memory_operand") - (match_operand 1 "general_operand")) -- (unspec:DI [(match_operand 2 "const_int_operand")] UNSPEC_CALLEE_ABI) -+ (unspec:DI [(match_operand 2)] UNSPEC_CALLEE_ABI) - (return)])] - "" - { -@@ -1124,7 +1124,7 @@ - [(set (match_operand 0 "") - (call (match_operand 1 "memory_operand") - (match_operand 2 "general_operand"))) -- (unspec:DI [(match_operand 3 "const_int_operand")] UNSPEC_CALLEE_ABI) -+ (unspec:DI [(match_operand 3)] UNSPEC_CALLEE_ABI) - (return)])] - "" - { -@@ -7747,3 +7747,6 @@ - - ;; SVE2. - (include "aarch64-sve2.md") -+ -+;; SME and extensions -+(include "aarch64-sme.md") -diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64 -index 10cd8f093..49731ba92 100644 ---- a/gcc/config/aarch64/t-aarch64 -+++ b/gcc/config/aarch64/t-aarch64 -@@ -186,9 +186,12 @@ MULTILIB_DIRNAMES = $(subst $(comma), ,$(TM_MULTILIB_CONFIG)) - insn-conditions.md: s-check-sve-md - s-check-sve-md: $(srcdir)/config/aarch64/check-sve-md.awk \ - $(srcdir)/config/aarch64/aarch64-sve.md \ -- $(srcdir)/config/aarch64/aarch64-sve2.md -+ $(srcdir)/config/aarch64/aarch64-sve2.md \ -+ $(srcdir)/config/aarch64/aarch64-sme.md - $(AWK) -f $(srcdir)/config/aarch64/check-sve-md.awk \ - $(srcdir)/config/aarch64/aarch64-sve.md - $(AWK) -f $(srcdir)/config/aarch64/check-sve-md.awk \ - $(srcdir)/config/aarch64/aarch64-sve2.md -+ $(AWK) -f $(srcdir)/config/aarch64/check-sve-md.awk \ -+ $(srcdir)/config/aarch64/aarch64-sme.md - $(STAMP) s-check-sve-md -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c -new file mode 100644 -index 000000000..a2de55773 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c -@@ -0,0 +1,233 @@ -+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } -+// { dg-final { check-function-bodies "**" "" } } -+ -+void ns_callee (); -+ void s_callee () [[arm::streaming]]; -+ void sc_callee () [[arm::streaming_compatible]]; -+ -+void ns_callee_stack (int, int, int, int, int, int, int, int, int); -+ -+struct callbacks { -+ void (*ns_ptr) (); -+ void (*s_ptr) () [[arm::streaming]]; -+ void (*sc_ptr) () [[arm::streaming_compatible]]; -+}; -+ -+/* -+** n_caller: { target lp64 } -+** stp x30, (x19|x2[0-8]), \[sp, #?-96\]! -+** cntd x16 -+** str x16, \[sp, #?16\] -+** stp d8, d9, \[sp, #?32\] -+** stp d10, d11, \[sp, #?48\] -+** stp d12, d13, \[sp, #?64\] -+** stp d14, d15, \[sp, #?80\] -+** mov \1, x0 -+** bl ns_callee -+** smstart sm -+** bl s_callee -+** smstop sm -+** bl sc_callee -+** ldr (x[0-9]+), \[\1\] -+** blr \2 -+** ldr (x[0-9]+), \[\1, #?8\] -+** smstart sm -+** blr \3 -+** smstop sm -+** ldr (x[0-9]+), \[\1, #?16\] -+** blr \4 -+** ldp d8, d9, \[sp, #?32\] -+** ldp d10, d11, \[sp, #?48\] -+** ldp d12, d13, \[sp, #?64\] -+** ldp d14, d15, \[sp, #?80\] -+** ldp x30, \1, \[sp\], #?96 -+** ret -+*/ -+void -+n_caller (struct callbacks *c) -+{ -+ ns_callee (); -+ s_callee (); -+ sc_callee (); -+ -+ c->ns_ptr (); -+ c->s_ptr (); -+ c->sc_ptr (); -+} -+ -+/* -+** s_caller: { target lp64 } -+** stp x30, (x19|x2[0-8]), \[sp, #?-96\]! -+** cntd x16 -+** str x16, \[sp, #?16\] -+** stp d8, d9, \[sp, #?32\] -+** stp d10, d11, \[sp, #?48\] -+** stp d12, d13, \[sp, #?64\] -+** stp d14, d15, \[sp, #?80\] -+** mov \1, x0 -+** smstop sm -+** bl ns_callee -+** smstart sm -+** bl s_callee -+** bl sc_callee -+** ldr (x[0-9]+), \[\1\] -+** smstop sm -+** blr \2 -+** smstart sm -+** ldr (x[0-9]+), \[\1, #?8\] -+** blr \3 -+** ldr (x[0-9]+), \[\1, #?16\] -+** blr \4 -+** ldp d8, d9, \[sp, #?32\] -+** ldp d10, d11, \[sp, #?48\] -+** ldp d12, d13, \[sp, #?64\] -+** ldp d14, d15, \[sp, #?80\] -+** ldp x30, \1, \[sp\], #?96 -+** ret -+*/ -+void -+s_caller (struct callbacks *c) [[arm::streaming]] -+{ -+ ns_callee (); -+ s_callee (); -+ sc_callee (); -+ -+ c->ns_ptr (); -+ c->s_ptr (); -+ c->sc_ptr (); -+} -+ -+/* -+** sc_caller_sme: -+** stp x29, x30, \[sp, #?-96\]! -+** mov x29, sp -+** cntd x16 -+** str x16, \[sp, #?24\] -+** stp d8, d9, \[sp, #?32\] -+** stp d10, d11, \[sp, #?48\] -+** stp d12, d13, \[sp, #?64\] -+** stp d14, d15, \[sp, #?80\] -+** mrs x16, svcr -+** str x16, \[x29, #?16\] -+** ldr x16, \[x29, #?16\] -+** tbz x16, 0, .* -+** smstop sm -+** bl ns_callee -+** ldr x16, \[x29, #?16\] -+** tbz x16, 0, .* -+** smstart sm -+** ldr x16, \[x29, #?16\] -+** tbnz x16, 0, .* -+** smstart sm -+** bl s_callee -+** ldr x16, \[x29, #?16\] -+** tbnz x16, 0, .* -+** smstop sm -+** bl sc_callee -+** ldp d8, d9, \[sp, #?32\] -+** ldp d10, d11, \[sp, #?48\] -+** ldp d12, d13, \[sp, #?64\] -+** ldp d14, d15, \[sp, #?80\] -+** ldp x29, x30, \[sp\], #?96 -+** ret -+*/ -+void -+sc_caller_sme () [[arm::streaming_compatible]] -+{ -+ ns_callee (); -+ s_callee (); -+ sc_callee (); -+} -+ -+#pragma GCC target "+nosme" -+ -+/* -+** sc_caller: -+** stp x29, x30, \[sp, #?-96\]! -+** mov x29, sp -+** cntd x16 -+** str x16, \[sp, #?24\] -+** stp d8, d9, \[sp, #?32\] -+** stp d10, d11, \[sp, #?48\] -+** stp d12, d13, \[sp, #?64\] -+** stp d14, d15, \[sp, #?80\] -+** bl __arm_sme_state -+** str x0, \[x29, #?16\] -+** ... -+** bl sc_callee -+** ldp d8, d9, \[sp, #?32\] -+** ldp d10, d11, \[sp, #?48\] -+** ldp d12, d13, \[sp, #?64\] -+** ldp d14, d15, \[sp, #?80\] -+** ldp x29, x30, \[sp\], #?96 -+** ret -+*/ -+void -+sc_caller () [[arm::streaming_compatible]] -+{ -+ ns_callee (); -+ sc_callee (); -+} -+ -+/* -+** sc_caller_x0: -+** ... -+** mov x10, x0 -+** bl __arm_sme_state -+** ... -+** str wzr, \[x10\] -+** ... -+*/ -+void -+sc_caller_x0 (int *ptr) [[arm::streaming_compatible]] -+{ -+ *ptr = 0; -+ ns_callee (); -+ sc_callee (); -+} -+ -+/* -+** sc_caller_x1: -+** ... -+** mov x10, x0 -+** mov x11, x1 -+** bl __arm_sme_state -+** ... -+** str w11, \[x10\] -+** ... -+*/ -+void -+sc_caller_x1 (int *ptr, int a) [[arm::streaming_compatible]] -+{ -+ *ptr = a; -+ ns_callee (); -+ sc_callee (); -+} -+ -+/* -+** sc_caller_stack: -+** sub sp, sp, #112 -+** stp x29, x30, \[sp, #?16\] -+** add x29, sp, #?16 -+** ... -+** stp d8, d9, \[sp, #?48\] -+** ... -+** bl __arm_sme_state -+** str x0, \[x29, #?16\] -+** ... -+** bl ns_callee_stack -+** ldr x16, \[x29, #?16\] -+** tbz x16, 0, .* -+** smstart sm -+** ... -+*/ -+void -+sc_caller_stack () [[arm::streaming_compatible]] -+{ -+ ns_callee_stack (0, 0, 0, 0, 0, 0, 0, 0, 0); -+} -+ -+/* { dg-final { scan-assembler {n_caller:(?:(?!ret).)*\.cfi_offset 46, -80\n} } } */ -+/* { dg-final { scan-assembler {s_caller:(?:(?!ret).)*\.cfi_offset 46, -80\n} } } */ -+/* { dg-final { scan-assembler {sc_caller_sme:(?:(?!ret).)*\.cfi_offset 46, -72\n} } } */ -+/* { dg-final { scan-assembler {sc_caller:(?:(?!ret).)*\.cfi_offset 46, -72\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_10.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_10.c -new file mode 100644 -index 000000000..49c5e4a6a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_10.c -@@ -0,0 +1,37 @@ -+// { dg-options "" } -+ -+#pragma GCC target "+nosme" -+ -+void ns_callee (); -+ void s_callee () [[arm::streaming]]; -+ void sc_callee () [[arm::streaming_compatible]]; -+ -+struct callbacks { -+ void (*ns_ptr) (); -+ void (*s_ptr) () [[arm::streaming]]; -+ void (*sc_ptr) () [[arm::streaming_compatible]]; -+}; -+ -+void -+n_caller (struct callbacks *c) -+{ -+ ns_callee (); -+ s_callee (); // { dg-error "calling a streaming function requires the ISA extension 'sme'" } -+ sc_callee (); -+ -+ c->ns_ptr (); -+ c->s_ptr (); // { dg-error "calling a streaming function requires the ISA extension 'sme'" } -+ c->sc_ptr (); -+} -+ -+void -+sc_caller_sme (struct callbacks *c) [[arm::streaming_compatible]] -+{ -+ ns_callee (); -+ s_callee (); // { dg-error "calling a streaming function requires the ISA extension 'sme'" } -+ sc_callee (); -+ -+ c->ns_ptr (); -+ c->s_ptr (); // { dg-error "calling a streaming function requires the ISA extension 'sme'" } -+ c->sc_ptr (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_2.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_2.c -new file mode 100644 -index 000000000..890fcbc5b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_2.c -@@ -0,0 +1,43 @@ -+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } -+ -+void ns_callee (); -+ void s_callee () [[arm::streaming]]; -+ void sc_callee () [[arm::streaming_compatible]]; -+ -+struct callbacks { -+ void (*ns_ptr) (); -+ void (*s_ptr) () [[arm::streaming]]; -+ void (*sc_ptr) () [[arm::streaming_compatible]]; -+}; -+ -+void -+n_caller (struct callbacks *c) -+{ -+ ns_callee (); -+ sc_callee (); -+ -+ c->ns_ptr (); -+ c->sc_ptr (); -+} -+ -+void -+s_caller (struct callbacks *c) [[arm::streaming]] -+{ -+ s_callee (); -+ sc_callee (); -+ -+ c->s_ptr (); -+ c->sc_ptr (); -+} -+ -+void -+sc_caller (struct callbacks *c) [[arm::streaming_compatible]] -+{ -+ sc_callee (); -+ -+ c->sc_ptr (); -+} -+ -+// { dg-final { scan-assembler-not {[dpqz][0-9]+,} } } -+// { dg-final { scan-assembler-not {smstart\tsm} } } -+// { dg-final { scan-assembler-not {smstop\tsm} } } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c -new file mode 100644 -index 000000000..ed999d085 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c -@@ -0,0 +1,166 @@ -+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } -+// { dg-final { check-function-bodies "**" "" } } -+ -+__attribute__((aarch64_vector_pcs)) void ns_callee (); -+__attribute__((aarch64_vector_pcs)) void s_callee () [[arm::streaming]]; -+__attribute__((aarch64_vector_pcs)) void sc_callee () [[arm::streaming_compatible]]; -+ -+struct callbacks { -+ __attribute__((aarch64_vector_pcs)) void (*ns_ptr) (); -+ __attribute__((aarch64_vector_pcs)) void (*s_ptr) () [[arm::streaming]]; -+ __attribute__((aarch64_vector_pcs)) void (*sc_ptr) () [[arm::streaming_compatible]]; -+}; -+ -+/* -+** n_caller: { target lp64 } -+** stp x30, (x19|x2[0-8]), \[sp, #?-288\]! -+** cntd x16 -+** str x16, \[sp, #?16\] -+** stp q8, q9, \[sp, #?32\] -+** stp q10, q11, \[sp, #?64\] -+** stp q12, q13, \[sp, #?96\] -+** stp q14, q15, \[sp, #?128\] -+** stp q16, q17, \[sp, #?160\] -+** stp q18, q19, \[sp, #?192\] -+** stp q20, q21, \[sp, #?224\] -+** stp q22, q23, \[sp, #?256\] -+** mov \1, x0 -+** bl ns_callee -+** smstart sm -+** bl s_callee -+** smstop sm -+** bl sc_callee -+** ldr (x[0-9]+), \[\1\] -+** blr \2 -+** ldr (x[0-9]+), \[\1, #?8\] -+** smstart sm -+** blr \3 -+** smstop sm -+** ldr (x[0-9]+), \[\1, #?16\] -+** blr \4 -+** ldp q8, q9, \[sp, #?32\] -+** ldp q10, q11, \[sp, #?64\] -+** ldp q12, q13, \[sp, #?96\] -+** ldp q14, q15, \[sp, #?128\] -+** ldp q16, q17, \[sp, #?160\] -+** ldp q18, q19, \[sp, #?192\] -+** ldp q20, q21, \[sp, #?224\] -+** ldp q22, q23, \[sp, #?256\] -+** ldp x30, \1, \[sp\], #?288 -+** ret -+*/ -+void __attribute__((aarch64_vector_pcs)) -+n_caller (struct callbacks *c) -+{ -+ ns_callee (); -+ s_callee (); -+ sc_callee (); -+ -+ c->ns_ptr (); -+ c->s_ptr (); -+ c->sc_ptr (); -+} -+ -+/* -+** s_caller: { target lp64 } -+** stp x30, (x19|x2[0-8]), \[sp, #?-288\]! -+** cntd x16 -+** str x16, \[sp, #?16\] -+** stp q8, q9, \[sp, #?32\] -+** stp q10, q11, \[sp, #?64\] -+** stp q12, q13, \[sp, #?96\] -+** stp q14, q15, \[sp, #?128\] -+** stp q16, q17, \[sp, #?160\] -+** stp q18, q19, \[sp, #?192\] -+** stp q20, q21, \[sp, #?224\] -+** stp q22, q23, \[sp, #?256\] -+** mov \1, x0 -+** smstop sm -+** bl ns_callee -+** smstart sm -+** bl s_callee -+** bl sc_callee -+** ldr (x[0-9]+), \[\1\] -+** smstop sm -+** blr \2 -+** smstart sm -+** ldr (x[0-9]+), \[\1, #?8\] -+** blr \3 -+** ldr (x[0-9]+), \[\1, #?16\] -+** blr \4 -+** ldp q8, q9, \[sp, #?32\] -+** ldp q10, q11, \[sp, #?64\] -+** ldp q12, q13, \[sp, #?96\] -+** ldp q14, q15, \[sp, #?128\] -+** ldp q16, q17, \[sp, #?160\] -+** ldp q18, q19, \[sp, #?192\] -+** ldp q20, q21, \[sp, #?224\] -+** ldp q22, q23, \[sp, #?256\] -+** ldp x30, \1, \[sp\], #?288 -+** ret -+*/ -+void __attribute__((aarch64_vector_pcs)) -+s_caller (struct callbacks *c) [[arm::streaming]] -+{ -+ ns_callee (); -+ s_callee (); -+ sc_callee (); -+ -+ c->ns_ptr (); -+ c->s_ptr (); -+ c->sc_ptr (); -+} -+ -+/* -+** sc_caller: -+** stp x29, x30, \[sp, #?-288\]! -+** mov x29, sp -+** cntd x16 -+** str x16, \[sp, #?24\] -+** stp q8, q9, \[sp, #?32\] -+** stp q10, q11, \[sp, #?64\] -+** stp q12, q13, \[sp, #?96\] -+** stp q14, q15, \[sp, #?128\] -+** stp q16, q17, \[sp, #?160\] -+** stp q18, q19, \[sp, #?192\] -+** stp q20, q21, \[sp, #?224\] -+** stp q22, q23, \[sp, #?256\] -+** mrs x16, svcr -+** str x16, \[x29, #?16\] -+** ldr x16, \[x29, #?16\] -+** tbz x16, 0, .* -+** smstop sm -+** bl ns_callee -+** ldr x16, \[x29, #?16\] -+** tbz x16, 0, .* -+** smstart sm -+** ldr x16, \[x29, #?16\] -+** tbnz x16, 0, .* -+** smstart sm -+** bl s_callee -+** ldr x16, \[x29, #?16\] -+** tbnz x16, 0, .* -+** smstop sm -+** bl sc_callee -+** ldp q8, q9, \[sp, #?32\] -+** ldp q10, q11, \[sp, #?64\] -+** ldp q12, q13, \[sp, #?96\] -+** ldp q14, q15, \[sp, #?128\] -+** ldp q16, q17, \[sp, #?160\] -+** ldp q18, q19, \[sp, #?192\] -+** ldp q20, q21, \[sp, #?224\] -+** ldp q22, q23, \[sp, #?256\] -+** ldp x29, x30, \[sp\], #?288 -+** ret -+*/ -+void __attribute__((aarch64_vector_pcs)) -+sc_caller () [[arm::streaming_compatible]] -+{ -+ ns_callee (); -+ s_callee (); -+ sc_callee (); -+} -+ -+/* { dg-final { scan-assembler {n_caller:(?:(?!ret).)*\.cfi_offset 46, -272\n} } } */ -+/* { dg-final { scan-assembler {s_caller:(?:(?!ret).)*\.cfi_offset 46, -272\n} } } */ -+/* { dg-final { scan-assembler {sc_caller:(?:(?!ret).)*\.cfi_offset 46, -264\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_4.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_4.c -new file mode 100644 -index 000000000..f93a67f97 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_4.c -@@ -0,0 +1,43 @@ -+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } -+ -+__attribute__((aarch64_vector_pcs)) void ns_callee (); -+__attribute__((aarch64_vector_pcs)) void s_callee () [[arm::streaming]]; -+__attribute__((aarch64_vector_pcs)) void sc_callee () [[arm::streaming_compatible]]; -+ -+struct callbacks { -+ __attribute__((aarch64_vector_pcs)) void (*ns_ptr) (); -+ __attribute__((aarch64_vector_pcs)) void (*s_ptr) () [[arm::streaming]]; -+ __attribute__((aarch64_vector_pcs)) void (*sc_ptr) () [[arm::streaming_compatible]]; -+}; -+ -+void __attribute__((aarch64_vector_pcs)) -+n_caller (struct callbacks *c) -+{ -+ ns_callee (); -+ sc_callee (); -+ -+ c->ns_ptr (); -+ c->sc_ptr (); -+} -+ -+void __attribute__((aarch64_vector_pcs)) -+s_caller (struct callbacks *c) [[arm::streaming]] -+{ -+ s_callee (); -+ sc_callee (); -+ -+ c->s_ptr (); -+ c->sc_ptr (); -+} -+ -+void __attribute__((aarch64_vector_pcs)) -+sc_caller (struct callbacks *c) [[arm::streaming_compatible]] -+{ -+ sc_callee (); -+ -+ c->sc_ptr (); -+} -+ -+// { dg-final { scan-assembler-not {[dpqz][0-9]+,} } } -+// { dg-final { scan-assembler-not {smstart\tsm} } } -+// { dg-final { scan-assembler-not {smstop\tsm} } } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c -new file mode 100644 -index 000000000..be9b5cc04 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c -@@ -0,0 +1,318 @@ -+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } -+// { dg-final { check-function-bodies "**" "" } } -+ -+#include -+ -+svbool_t ns_callee (); -+ svbool_t s_callee () [[arm::streaming]]; -+ svbool_t sc_callee () [[arm::streaming_compatible]]; -+ -+struct callbacks { -+ svbool_t (*ns_ptr) (); -+ svbool_t (*s_ptr) () [[arm::streaming]]; -+ svbool_t (*sc_ptr) () [[arm::streaming_compatible]]; -+}; -+ -+/* -+** n_caller: { target lp64 } -+** stp x30, (x19|x2[0-8]), \[sp, #?-32\]! -+** cntd x16 -+** str x16, \[sp, #?16\] -+** addvl sp, sp, #-18 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str p12, \[sp, #8, mul vl\] -+** str p13, \[sp, #9, mul vl\] -+** str p14, \[sp, #10, mul vl\] -+** str p15, \[sp, #11, mul vl\] -+** str z8, \[sp, #2, mul vl\] -+** str z9, \[sp, #3, mul vl\] -+** str z10, \[sp, #4, mul vl\] -+** str z11, \[sp, #5, mul vl\] -+** str z12, \[sp, #6, mul vl\] -+** str z13, \[sp, #7, mul vl\] -+** str z14, \[sp, #8, mul vl\] -+** str z15, \[sp, #9, mul vl\] -+** str z16, \[sp, #10, mul vl\] -+** str z17, \[sp, #11, mul vl\] -+** str z18, \[sp, #12, mul vl\] -+** str z19, \[sp, #13, mul vl\] -+** str z20, \[sp, #14, mul vl\] -+** str z21, \[sp, #15, mul vl\] -+** str z22, \[sp, #16, mul vl\] -+** str z23, \[sp, #17, mul vl\] -+** mov \1, x0 -+** bl ns_callee -+** smstart sm -+** bl s_callee -+** addvl sp, sp, #-1 -+** str p0, \[sp\] -+** smstop sm -+** ldr p0, \[sp\] -+** addvl sp, sp, #1 -+** bl sc_callee -+** ldr (x[0-9]+), \[\1\] -+** blr \2 -+** ldr (x[0-9]+), \[\1, #?8\] -+** smstart sm -+** blr \3 -+** addvl sp, sp, #-1 -+** str p0, \[sp\] -+** smstop sm -+** ldr p0, \[sp\] -+** addvl sp, sp, #1 -+** ldr (x[0-9]+), \[\1, #?16\] -+** blr \4 -+** ldr z8, \[sp, #2, mul vl\] -+** ldr z9, \[sp, #3, mul vl\] -+** ldr z10, \[sp, #4, mul vl\] -+** ldr z11, \[sp, #5, mul vl\] -+** ldr z12, \[sp, #6, mul vl\] -+** ldr z13, \[sp, #7, mul vl\] -+** ldr z14, \[sp, #8, mul vl\] -+** ldr z15, \[sp, #9, mul vl\] -+** ldr z16, \[sp, #10, mul vl\] -+** ldr z17, \[sp, #11, mul vl\] -+** ldr z18, \[sp, #12, mul vl\] -+** ldr z19, \[sp, #13, mul vl\] -+** ldr z20, \[sp, #14, mul vl\] -+** ldr z21, \[sp, #15, mul vl\] -+** ldr z22, \[sp, #16, mul vl\] -+** ldr z23, \[sp, #17, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** ldr p12, \[sp, #8, mul vl\] -+** ldr p13, \[sp, #9, mul vl\] -+** ldr p14, \[sp, #10, mul vl\] -+** ldr p15, \[sp, #11, mul vl\] -+** addvl sp, sp, #18 -+** ldp x30, \1, \[sp\], #?32 -+** ret -+*/ -+svbool_t -+n_caller (struct callbacks *c) -+{ -+ ns_callee (); -+ s_callee (); -+ sc_callee (); -+ -+ c->ns_ptr (); -+ c->s_ptr (); -+ return c->sc_ptr (); -+} -+ -+/* -+** s_caller: { target lp64 } -+** stp x30, (x19|x2[0-8]), \[sp, #?-32\]! -+** cntd x16 -+** str x16, \[sp, #?16\] -+** addvl sp, sp, #-18 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str p12, \[sp, #8, mul vl\] -+** str p13, \[sp, #9, mul vl\] -+** str p14, \[sp, #10, mul vl\] -+** str p15, \[sp, #11, mul vl\] -+** str z8, \[sp, #2, mul vl\] -+** str z9, \[sp, #3, mul vl\] -+** str z10, \[sp, #4, mul vl\] -+** str z11, \[sp, #5, mul vl\] -+** str z12, \[sp, #6, mul vl\] -+** str z13, \[sp, #7, mul vl\] -+** str z14, \[sp, #8, mul vl\] -+** str z15, \[sp, #9, mul vl\] -+** str z16, \[sp, #10, mul vl\] -+** str z17, \[sp, #11, mul vl\] -+** str z18, \[sp, #12, mul vl\] -+** str z19, \[sp, #13, mul vl\] -+** str z20, \[sp, #14, mul vl\] -+** str z21, \[sp, #15, mul vl\] -+** str z22, \[sp, #16, mul vl\] -+** str z23, \[sp, #17, mul vl\] -+** mov \1, x0 -+** smstop sm -+** bl ns_callee -+** addvl sp, sp, #-1 -+** str p0, \[sp\] -+** smstart sm -+** ldr p0, \[sp\] -+** addvl sp, sp, #1 -+** bl s_callee -+** bl sc_callee -+** ldr (x[0-9]+), \[\1\] -+** smstop sm -+** blr \2 -+** addvl sp, sp, #-1 -+** str p0, \[sp\] -+** smstart sm -+** ldr p0, \[sp\] -+** addvl sp, sp, #1 -+** ldr (x[0-9]+), \[\1, #?8\] -+** blr \3 -+** ldr (x[0-9]+), \[\1, #?16\] -+** blr \4 -+** ldr z8, \[sp, #2, mul vl\] -+** ldr z9, \[sp, #3, mul vl\] -+** ldr z10, \[sp, #4, mul vl\] -+** ldr z11, \[sp, #5, mul vl\] -+** ldr z12, \[sp, #6, mul vl\] -+** ldr z13, \[sp, #7, mul vl\] -+** ldr z14, \[sp, #8, mul vl\] -+** ldr z15, \[sp, #9, mul vl\] -+** ldr z16, \[sp, #10, mul vl\] -+** ldr z17, \[sp, #11, mul vl\] -+** ldr z18, \[sp, #12, mul vl\] -+** ldr z19, \[sp, #13, mul vl\] -+** ldr z20, \[sp, #14, mul vl\] -+** ldr z21, \[sp, #15, mul vl\] -+** ldr z22, \[sp, #16, mul vl\] -+** ldr z23, \[sp, #17, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** ldr p12, \[sp, #8, mul vl\] -+** ldr p13, \[sp, #9, mul vl\] -+** ldr p14, \[sp, #10, mul vl\] -+** ldr p15, \[sp, #11, mul vl\] -+** addvl sp, sp, #18 -+** ldp x30, \1, \[sp\], #?32 -+** ret -+*/ -+svbool_t -+s_caller (struct callbacks *c) [[arm::streaming]] -+{ -+ ns_callee (); -+ s_callee (); -+ sc_callee (); -+ -+ c->ns_ptr (); -+ c->s_ptr (); -+ return c->sc_ptr (); -+} -+ -+/* -+** sc_caller: -+** stp x29, x30, \[sp, #?-32\]! -+** mov x29, sp -+** cntd x16 -+** str x16, \[sp, #?24\] -+** addvl sp, sp, #-18 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str p12, \[sp, #8, mul vl\] -+** str p13, \[sp, #9, mul vl\] -+** str p14, \[sp, #10, mul vl\] -+** str p15, \[sp, #11, mul vl\] -+** str z8, \[sp, #2, mul vl\] -+** str z9, \[sp, #3, mul vl\] -+** str z10, \[sp, #4, mul vl\] -+** str z11, \[sp, #5, mul vl\] -+** str z12, \[sp, #6, mul vl\] -+** str z13, \[sp, #7, mul vl\] -+** str z14, \[sp, #8, mul vl\] -+** str z15, \[sp, #9, mul vl\] -+** str z16, \[sp, #10, mul vl\] -+** str z17, \[sp, #11, mul vl\] -+** str z18, \[sp, #12, mul vl\] -+** str z19, \[sp, #13, mul vl\] -+** str z20, \[sp, #14, mul vl\] -+** str z21, \[sp, #15, mul vl\] -+** str z22, \[sp, #16, mul vl\] -+** str z23, \[sp, #17, mul vl\] -+** mrs x16, svcr -+** str x16, \[x29, #?16\] -+** ldr x16, \[x29, #?16\] -+** tbz x16, 0, .* -+** smstop sm -+** bl ns_callee -+** ldr x16, \[x29, #?16\] -+** tbz x16, 0, .* -+** addvl sp, sp, #-1 -+** str p0, \[sp\] -+** smstart sm -+** ldr p0, \[sp\] -+** addvl sp, sp, #1 -+** ldr x16, \[x29, #?16\] -+** tbnz x16, 0, .* -+** smstart sm -+** bl s_callee -+** ldr x16, \[x29, #?16\] -+** tbnz x16, 0, .* -+** addvl sp, sp, #-1 -+** str p0, \[sp\] -+** smstop sm -+** ldr p0, \[sp\] -+** addvl sp, sp, #1 -+** bl sc_callee -+** ldr z8, \[sp, #2, mul vl\] -+** ldr z9, \[sp, #3, mul vl\] -+** ldr z10, \[sp, #4, mul vl\] -+** ldr z11, \[sp, #5, mul vl\] -+** ldr z12, \[sp, #6, mul vl\] -+** ldr z13, \[sp, #7, mul vl\] -+** ldr z14, \[sp, #8, mul vl\] -+** ldr z15, \[sp, #9, mul vl\] -+** ldr z16, \[sp, #10, mul vl\] -+** ldr z17, \[sp, #11, mul vl\] -+** ldr z18, \[sp, #12, mul vl\] -+** ldr z19, \[sp, #13, mul vl\] -+** ldr z20, \[sp, #14, mul vl\] -+** ldr z21, \[sp, #15, mul vl\] -+** ldr z22, \[sp, #16, mul vl\] -+** ldr z23, \[sp, #17, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** ldr p12, \[sp, #8, mul vl\] -+** ldr p13, \[sp, #9, mul vl\] -+** ldr p14, \[sp, #10, mul vl\] -+** ldr p15, \[sp, #11, mul vl\] -+** addvl sp, sp, #18 -+** ldp x29, x30, \[sp\], #?32 -+** ret -+*/ -+svbool_t -+sc_caller () [[arm::streaming_compatible]] -+{ -+ ns_callee (); -+ s_callee (); -+ return sc_callee (); -+} -+ -+/* { dg-final { scan-assembler {n_caller:(?:(?!ret).)*\.cfi_offset 46, -16\n} } } */ -+/* { dg-final { scan-assembler {s_caller:(?:(?!ret).)*\.cfi_offset 46, -16\n} } } */ -+/* { dg-final { scan-assembler {sc_caller:(?:(?!ret).)*\.cfi_offset 46, -8\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_6.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_6.c -new file mode 100644 -index 000000000..0f6bc4f6c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_6.c -@@ -0,0 +1,45 @@ -+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } -+ -+#include -+ -+svbool_t ns_callee (); -+ svbool_t s_callee () [[arm::streaming]]; -+ svbool_t sc_callee () [[arm::streaming_compatible]]; -+ -+struct callbacks { -+ svbool_t (*ns_ptr) (); -+ svbool_t (*s_ptr) () [[arm::streaming]]; -+ svbool_t (*sc_ptr) () [[arm::streaming_compatible]]; -+}; -+ -+svbool_t -+n_caller (struct callbacks *c) -+{ -+ ns_callee (); -+ sc_callee (); -+ -+ c->ns_ptr (); -+ return c->sc_ptr (); -+} -+ -+svbool_t -+s_caller (struct callbacks *c) [[arm::streaming]] -+{ -+ s_callee (); -+ sc_callee (); -+ -+ c->s_ptr (); -+ return c->sc_ptr (); -+} -+ -+svbool_t -+sc_caller (struct callbacks *c) [[arm::streaming_compatible]] -+{ -+ sc_callee (); -+ -+ return c->sc_ptr (); -+} -+ -+// { dg-final { scan-assembler-not {[dpqz][0-9]+,} } } -+// { dg-final { scan-assembler-not {smstart\tsm} } } -+// { dg-final { scan-assembler-not {smstop\tsm} } } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_7.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_7.c -new file mode 100644 -index 000000000..6482a489f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_7.c -@@ -0,0 +1,516 @@ -+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } -+// { dg-final { check-function-bodies "**" "" } } -+ -+#include -+#include -+ -+double produce_d0 (); -+void consume_d0 (double); -+ -+/* -+** test_d0: -+** ... -+** smstop sm -+** bl produce_d0 -+** fmov x10, d0 -+** smstart sm -+** fmov d0, x10 -+** fmov x10, d0 -+** smstop sm -+** fmov d0, x10 -+** bl consume_d0 -+** ... -+*/ -+void -+test_d0 () [[arm::streaming]] -+{ -+ double res = produce_d0 (); -+ asm volatile (""); -+ consume_d0 (res); -+} -+ -+int8x8_t produce_d0_vec (); -+void consume_d0_vec (int8x8_t); -+ -+/* -+** test_d0_vec: -+** ... -+** smstop sm -+** bl produce_d0_vec -+** ( -+** fmov x10, d0 -+** | -+** umov x10, v0.d\[0\] -+** ) -+** smstart sm -+** fmov d0, x10 -+** ( -+** fmov x10, d0 -+** | -+** umov x10, v0.d\[0\] -+** ) -+** smstop sm -+** fmov d0, x10 -+** bl consume_d0_vec -+** ... -+*/ -+void -+test_d0_vec () [[arm::streaming]] -+{ -+ int8x8_t res = produce_d0_vec (); -+ asm volatile (""); -+ consume_d0_vec (res); -+} -+ -+int8x16_t produce_q0 (); -+void consume_q0 (int8x16_t); -+ -+/* -+** test_q0: -+** ... -+** smstop sm -+** bl produce_q0 -+** str q0, \[sp, #?-16\]! -+** smstart sm -+** ldr q0, \[sp\], #?16 -+** str q0, \[sp, #?-16\]! -+** smstop sm -+** ldr q0, \[sp\], #?16 -+** bl consume_q0 -+** ... -+*/ -+void -+test_q0 () [[arm::streaming]] -+{ -+ int8x16_t res = produce_q0 (); -+ asm volatile (""); -+ consume_q0 (res); -+} -+ -+int8x16x2_t produce_q1 (); -+void consume_q1 (int8x16x2_t); -+ -+/* -+** test_q1: -+** ... -+** smstop sm -+** bl produce_q1 -+** stp q0, q1, \[sp, #?-32\]! -+** smstart sm -+** ldp q0, q1, \[sp\], #?32 -+** stp q0, q1, \[sp, #?-32\]! -+** smstop sm -+** ldp q0, q1, \[sp\], #?32 -+** bl consume_q1 -+** ... -+*/ -+void -+test_q1 () [[arm::streaming]] -+{ -+ int8x16x2_t res = produce_q1 (); -+ asm volatile (""); -+ consume_q1 (res); -+} -+ -+int8x16x3_t produce_q2 (); -+void consume_q2 (int8x16x3_t); -+ -+/* -+** test_q2: -+** ... -+** smstop sm -+** bl produce_q2 -+** stp q0, q1, \[sp, #?-48\]! -+** str q2, \[sp, #?32\] -+** smstart sm -+** ldr q2, \[sp, #?32\] -+** ldp q0, q1, \[sp\], #?48 -+** stp q0, q1, \[sp, #?-48\]! -+** str q2, \[sp, #?32\] -+** smstop sm -+** ldr q2, \[sp, #?32\] -+** ldp q0, q1, \[sp\], #?48 -+** bl consume_q2 -+** ... -+*/ -+void -+test_q2 () [[arm::streaming]] -+{ -+ int8x16x3_t res = produce_q2 (); -+ asm volatile (""); -+ consume_q2 (res); -+} -+ -+int8x16x4_t produce_q3 (); -+void consume_q3 (int8x16x4_t); -+ -+/* -+** test_q3: -+** ... -+** smstop sm -+** bl produce_q3 -+** stp q0, q1, \[sp, #?-64\]! -+** stp q2, q3, \[sp, #?32\] -+** smstart sm -+** ldp q2, q3, \[sp, #?32\] -+** ldp q0, q1, \[sp\], #?64 -+** stp q0, q1, \[sp, #?-64\]! -+** stp q2, q3, \[sp, #?32\] -+** smstop sm -+** ldp q2, q3, \[sp, #?32\] -+** ldp q0, q1, \[sp\], #?64 -+** bl consume_q3 -+** ... -+*/ -+void -+test_q3 () [[arm::streaming]] -+{ -+ int8x16x4_t res = produce_q3 (); -+ asm volatile (""); -+ consume_q3 (res); -+} -+ -+svint8_t produce_z0 (); -+void consume_z0 (svint8_t); -+ -+/* -+** test_z0: -+** ... -+** smstop sm -+** bl produce_z0 -+** addvl sp, sp, #-1 -+** str z0, \[sp\] -+** smstart sm -+** ldr z0, \[sp\] -+** addvl sp, sp, #1 -+** addvl sp, sp, #-1 -+** str z0, \[sp\] -+** smstop sm -+** ldr z0, \[sp\] -+** addvl sp, sp, #1 -+** bl consume_z0 -+** ... -+*/ -+void -+test_z0 () [[arm::streaming]] -+{ -+ svint8_t res = produce_z0 (); -+ asm volatile (""); -+ consume_z0 (res); -+} -+ -+svint8x4_t produce_z3 (); -+void consume_z3 (svint8x4_t); -+ -+/* -+** test_z3: -+** ... -+** smstop sm -+** bl produce_z3 -+** addvl sp, sp, #-4 -+** str z0, \[sp\] -+** str z1, \[sp, #1, mul vl\] -+** str z2, \[sp, #2, mul vl\] -+** str z3, \[sp, #3, mul vl\] -+** smstart sm -+** ldr z0, \[sp\] -+** ldr z1, \[sp, #1, mul vl\] -+** ldr z2, \[sp, #2, mul vl\] -+** ldr z3, \[sp, #3, mul vl\] -+** addvl sp, sp, #4 -+** addvl sp, sp, #-4 -+** str z0, \[sp\] -+** str z1, \[sp, #1, mul vl\] -+** str z2, \[sp, #2, mul vl\] -+** str z3, \[sp, #3, mul vl\] -+** smstop sm -+** ldr z0, \[sp\] -+** ldr z1, \[sp, #1, mul vl\] -+** ldr z2, \[sp, #2, mul vl\] -+** ldr z3, \[sp, #3, mul vl\] -+** addvl sp, sp, #4 -+** bl consume_z3 -+** ... -+*/ -+void -+test_z3 () [[arm::streaming]] -+{ -+ svint8x4_t res = produce_z3 (); -+ asm volatile (""); -+ consume_z3 (res); -+} -+ -+svbool_t produce_p0 (); -+void consume_p0 (svbool_t); -+ -+/* -+** test_p0: -+** ... -+** smstop sm -+** bl produce_p0 -+** addvl sp, sp, #-1 -+** str p0, \[sp\] -+** smstart sm -+** ldr p0, \[sp\] -+** addvl sp, sp, #1 -+** addvl sp, sp, #-1 -+** str p0, \[sp\] -+** smstop sm -+** ldr p0, \[sp\] -+** addvl sp, sp, #1 -+** bl consume_p0 -+** ... -+*/ -+void -+test_p0 () [[arm::streaming]] -+{ -+ svbool_t res = produce_p0 (); -+ asm volatile (""); -+ consume_p0 (res); -+} -+ -+void consume_d7 (double, double, double, double, double, double, double, -+ double); -+ -+/* -+** test_d7: -+** ... -+** fmov x10, d0 -+** fmov x11, d1 -+** fmov x12, d2 -+** fmov x13, d3 -+** fmov x14, d4 -+** fmov x15, d5 -+** fmov x16, d6 -+** fmov x17, d7 -+** smstop sm -+** fmov d0, x10 -+** fmov d1, x11 -+** fmov d2, x12 -+** fmov d3, x13 -+** fmov d4, x14 -+** fmov d5, x15 -+** fmov d6, x16 -+** fmov d7, x17 -+** bl consume_d7 -+** ... -+*/ -+void -+test_d7 () [[arm::streaming]] -+{ -+ consume_d7 (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); -+} -+ -+void consume_d7_vec (int8x8_t, int8x8_t, int8x8_t, int8x8_t, int8x8_t, -+ int8x8_t, int8x8_t, int8x8_t); -+ -+/* -+** test_d7_vec: -+** ... -+** ( -+** fmov x10, d0 -+** fmov x11, d1 -+** fmov x12, d2 -+** fmov x13, d3 -+** fmov x14, d4 -+** fmov x15, d5 -+** fmov x16, d6 -+** fmov x17, d7 -+** | -+** umov x10, v0.d\[0\] -+** umov x11, v1.d\[0\] -+** umov x12, v2.d\[0\] -+** umov x13, v3.d\[0\] -+** umov x14, v4.d\[0\] -+** umov x15, v5.d\[0\] -+** umov x16, v6.d\[0\] -+** umov x17, v7.d\[0\] -+** ) -+** smstop sm -+** fmov d0, x10 -+** fmov d1, x11 -+** fmov d2, x12 -+** fmov d3, x13 -+** fmov d4, x14 -+** fmov d5, x15 -+** fmov d6, x16 -+** fmov d7, x17 -+** bl consume_d7_vec -+** ... -+*/ -+void -+test_d7_vec (int8x8_t *ptr) [[arm::streaming]] -+{ -+ consume_d7_vec (*ptr, *ptr, *ptr, *ptr, *ptr, *ptr, *ptr, *ptr); -+} -+ -+void consume_q7 (int8x16_t, int8x16_t, int8x16_t, int8x16_t, int8x16_t, -+ int8x16_t, int8x16_t, int8x16_t); -+ -+/* -+** test_q7: -+** ... -+** stp q0, q1, \[sp, #?-128\]! -+** stp q2, q3, \[sp, #?32\] -+** stp q4, q5, \[sp, #?64\] -+** stp q6, q7, \[sp, #?96\] -+** smstop sm -+** ldp q2, q3, \[sp, #?32\] -+** ldp q4, q5, \[sp, #?64\] -+** ldp q6, q7, \[sp, #?96\] -+** ldp q0, q1, \[sp\], #?128 -+** bl consume_q7 -+** ... -+*/ -+void -+test_q7 (int8x16_t *ptr) [[arm::streaming]] -+{ -+ consume_q7 (*ptr, *ptr, *ptr, *ptr, *ptr, *ptr, *ptr, *ptr); -+} -+ -+void consume_z7 (svint8_t, svint8_t, svint8_t, svint8_t, svint8_t, -+ svint8_t, svint8_t, svint8_t); -+ -+/* -+** test_z7: -+** ... -+** addvl sp, sp, #-8 -+** str z0, \[sp\] -+** str z1, \[sp, #1, mul vl\] -+** str z2, \[sp, #2, mul vl\] -+** str z3, \[sp, #3, mul vl\] -+** str z4, \[sp, #4, mul vl\] -+** str z5, \[sp, #5, mul vl\] -+** str z6, \[sp, #6, mul vl\] -+** str z7, \[sp, #7, mul vl\] -+** smstop sm -+** ldr z0, \[sp\] -+** ldr z1, \[sp, #1, mul vl\] -+** ldr z2, \[sp, #2, mul vl\] -+** ldr z3, \[sp, #3, mul vl\] -+** ldr z4, \[sp, #4, mul vl\] -+** ldr z5, \[sp, #5, mul vl\] -+** ldr z6, \[sp, #6, mul vl\] -+** ldr z7, \[sp, #7, mul vl\] -+** addvl sp, sp, #8 -+** bl consume_z7 -+** ... -+*/ -+void -+test_z7 (svint8_t *ptr) [[arm::streaming]] -+{ -+ consume_z7 (*ptr, *ptr, *ptr, *ptr, *ptr, *ptr, *ptr, *ptr); -+} -+ -+void consume_p3 (svbool_t, svbool_t, svbool_t, svbool_t); -+ -+/* -+** test_p3: -+** ... -+** addvl sp, sp, #-1 -+** str p0, \[sp\] -+** str p1, \[sp, #1, mul vl\] -+** str p2, \[sp, #2, mul vl\] -+** str p3, \[sp, #3, mul vl\] -+** smstop sm -+** ldr p0, \[sp\] -+** ldr p1, \[sp, #1, mul vl\] -+** ldr p2, \[sp, #2, mul vl\] -+** ldr p3, \[sp, #3, mul vl\] -+** addvl sp, sp, #1 -+** bl consume_p3 -+** ... -+*/ -+void -+test_p3 (svbool_t *ptr) [[arm::streaming]] -+{ -+ consume_p3 (*ptr, *ptr, *ptr, *ptr); -+} -+ -+void consume_mixed (float, double, float32x4_t, svfloat32_t, -+ float, double, float64x2_t, svfloat64_t, -+ svbool_t, svbool_t, svbool_t, svbool_t); -+ -+/* -+** test_mixed: -+** ... -+** addvl sp, sp, #-3 -+** str p0, \[sp\] -+** str p1, \[sp, #1, mul vl\] -+** str p2, \[sp, #2, mul vl\] -+** str p3, \[sp, #3, mul vl\] -+** str z3, \[sp, #1, mul vl\] -+** str z7, \[sp, #2, mul vl\] -+** stp q2, q6, \[sp, #?-32\]! -+** fmov w10, s0 -+** fmov x11, d1 -+** fmov w12, s4 -+** fmov x13, d5 -+** smstop sm -+** fmov s0, w10 -+** fmov d1, x11 -+** fmov s4, w12 -+** fmov d5, x13 -+** ldp q2, q6, \[sp\], #?32 -+** ldr p0, \[sp\] -+** ldr p1, \[sp, #1, mul vl\] -+** ldr p2, \[sp, #2, mul vl\] -+** ldr p3, \[sp, #3, mul vl\] -+** ldr z3, \[sp, #1, mul vl\] -+** ldr z7, \[sp, #2, mul vl\] -+** addvl sp, sp, #3 -+** bl consume_mixed -+** ... -+*/ -+void -+test_mixed (float32x4_t *float32x4_ptr, -+ svfloat32_t *svfloat32_ptr, -+ float64x2_t *float64x2_ptr, -+ svfloat64_t *svfloat64_ptr, -+ svbool_t *svbool_ptr) [[arm::streaming]] -+{ -+ consume_mixed (1.0f, 2.0, *float32x4_ptr, *svfloat32_ptr, -+ 3.0f, 4.0, *float64x2_ptr, *svfloat64_ptr, -+ *svbool_ptr, *svbool_ptr, *svbool_ptr, *svbool_ptr); -+} -+ -+void consume_varargs (float, ...); -+ -+/* -+** test_varargs: -+** ... -+** stp q3, q7, \[sp, #?-32\]! -+** fmov w10, s0 -+** fmov x11, d1 -+** ( -+** fmov x12, d2 -+** | -+** umov x12, v2.d\[0\] -+** ) -+** fmov x13, d4 -+** fmov x14, d5 -+** ( -+** fmov x15, d6 -+** | -+** umov x15, v6.d\[0\] -+** ) -+** smstop sm -+** fmov s0, w10 -+** fmov d1, x11 -+** fmov d2, x12 -+** fmov d4, x13 -+** fmov d5, x14 -+** fmov d6, x15 -+** ldp q3, q7, \[sp\], #?32 -+** bl consume_varargs -+** ... -+*/ -+void -+test_varargs (float32x2_t *float32x2_ptr, -+ float32x4_t *float32x4_ptr, -+ float64x1_t *float64x1_ptr, -+ float64x2_t *float64x2_ptr) [[arm::streaming]] -+{ -+ consume_varargs (1.0f, 2.0, *float32x2_ptr, *float32x4_ptr, -+ 3.0f, 4.0, *float64x1_ptr, *float64x2_ptr); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c -new file mode 100644 -index 000000000..f44724df3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c -@@ -0,0 +1,87 @@ -+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -msve-vector-bits=128" } -+// { dg-final { check-function-bodies "**" "" } } -+ -+#include -+ -+svint8_t produce_z0 (); -+void consume_z0 (svint8_t); -+ -+/* -+** test_z0: -+** ... -+** smstop sm -+** bl produce_z0 -+** str q0, \[sp, #?-16\]! -+** smstart sm -+** ldr q0, \[sp\], #?16 -+** str q0, \[sp, #?-16\]! -+** smstop sm -+** ldr q0, \[sp\], #?16 -+** bl consume_z0 -+** ... -+*/ -+void -+test_z0 () [[arm::streaming]] -+{ -+ svint8_t res = produce_z0 (); -+ asm volatile (""); -+ consume_z0 (res); -+} -+ -+svint8x4_t produce_z3 (); -+void consume_z3 (svint8x4_t); -+ -+/* -+** test_z3: -+** ... -+** smstop sm -+** bl produce_z3 -+** stp q0, q1, \[sp, #?-64\]! -+** stp q2, q3, \[sp, #?32\] -+** smstart sm -+** ldp q2, q3, \[sp, #?32\] -+** ldp q0, q1, \[sp\], #?64 -+** stp q0, q1, \[sp, #?-64\]! -+** stp q2, q3, \[sp, #?32\] -+** smstop sm -+** ldp q2, q3, \[sp, #?32\] -+** ldp q0, q1, \[sp\], #?64 -+** bl consume_z3 -+** ... -+*/ -+void -+test_z3 () [[arm::streaming]] -+{ -+ svint8x4_t res = produce_z3 (); -+ asm volatile (""); -+ consume_z3 (res); -+} -+ -+svbool_t produce_p0 (); -+void consume_p0 (svbool_t); -+ -+/* -+** test_p0: -+** ... -+** smstop sm -+** bl produce_p0 -+** sub sp, sp, #?16 -+** str p0, \[sp\] -+** smstart sm -+** ldr p0, \[sp\] -+** add sp, sp, #?16 -+** sub sp, sp, #?16 -+** str p0, \[sp\] -+** smstop sm -+** ldr p0, \[sp\] -+** add sp, sp, #?16 -+** bl consume_p0 -+** ... -+*/ -+void -+test_p0 () [[arm::streaming]] -+{ -+ svbool_t res = produce_p0 (); -+ asm volatile (""); -+ consume_p0 (res); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_9.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_9.c -new file mode 100644 -index 000000000..83b4073ee ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_9.c -@@ -0,0 +1,103 @@ -+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -msve-vector-bits=256" } -+// { dg-final { check-function-bodies "**" "" } } -+ -+#include -+ -+svint8_t produce_z0 (); -+void consume_z0 (svint8_t); -+ -+/* -+** test_z0: -+** ... -+** smstop sm -+** bl produce_z0 -+** sub sp, sp, #?32 -+** str z0, \[sp\] -+** smstart sm -+** ldr z0, \[sp\] -+** add sp, sp, #?32 -+** sub sp, sp, #?32 -+** str z0, \[sp\] -+** smstop sm -+** ldr z0, \[sp\] -+** add sp, sp, #?32 -+** bl consume_z0 -+** ... -+*/ -+void -+test_z0 () [[arm::streaming]] -+{ -+ svint8_t res = produce_z0 (); -+ asm volatile (""); -+ consume_z0 (res); -+} -+ -+svint8x4_t produce_z3 (); -+void consume_z3 (svint8x4_t); -+ -+/* -+** test_z3: -+** ... -+** smstop sm -+** bl produce_z3 -+** sub sp, sp, #?128 -+** str z0, \[sp\] -+** str z1, \[sp, #1, mul vl\] -+** str z2, \[sp, #2, mul vl\] -+** str z3, \[sp, #3, mul vl\] -+** smstart sm -+** ldr z0, \[sp\] -+** ldr z1, \[sp, #1, mul vl\] -+** ldr z2, \[sp, #2, mul vl\] -+** ldr z3, \[sp, #3, mul vl\] -+** add sp, sp, #?128 -+** sub sp, sp, #?128 -+** str z0, \[sp\] -+** str z1, \[sp, #1, mul vl\] -+** str z2, \[sp, #2, mul vl\] -+** str z3, \[sp, #3, mul vl\] -+** smstop sm -+** ldr z0, \[sp\] -+** ldr z1, \[sp, #1, mul vl\] -+** ldr z2, \[sp, #2, mul vl\] -+** ldr z3, \[sp, #3, mul vl\] -+** add sp, sp, #?128 -+** bl consume_z3 -+** ... -+*/ -+void -+test_z3 () [[arm::streaming]] -+{ -+ svint8x4_t res = produce_z3 (); -+ asm volatile (""); -+ consume_z3 (res); -+} -+ -+svbool_t produce_p0 (); -+void consume_p0 (svbool_t); -+ -+/* -+** test_p0: -+** ... -+** smstop sm -+** bl produce_p0 -+** sub sp, sp, #?32 -+** str p0, \[sp\] -+** smstart sm -+** ldr p0, \[sp\] -+** add sp, sp, #?32 -+** sub sp, sp, #?32 -+** str p0, \[sp\] -+** smstop sm -+** ldr p0, \[sp\] -+** add sp, sp, #?32 -+** bl consume_p0 -+** ... -+*/ -+void -+test_p0 () [[arm::streaming]] -+{ -+ svbool_t res = produce_p0 (); -+ asm volatile (""); -+ consume_p0 (res); -+} --- -2.33.0 - diff --git a/0206-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch b/0206-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch deleted file mode 100644 index f15e7f6..0000000 --- a/0206-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch +++ /dev/null @@ -1,4324 +0,0 @@ -From 1efd433c779f66440facc8ba5cd23bdbdd6672ba Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:26 +0000 -Subject: [PATCH 107/157] [Backport][SME] aarch64: Add support for SME ZA - attributes - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3af9ceb631b741095d8eabd055ff7c23d4a69e6f - -SME has an array called ZA that can be enabled and disabled separately -from streaming mode. A status bit called PSTATE.ZA indicates whether -ZA is currently enabled or not. - -In C and C++, the state of PSTATE.ZA is controlled using function -attributes. There are four attributes that can be attached to -function types to indicate that the function shares ZA with its -caller. These are: - -- arm::in("za") -- arm::out("za") -- arm::inout("za") -- arm::preserves("za") - -If a function's type has one of these shared-ZA attributes, -PSTATE.ZA is specified to be 1 on entry to the function and on return -from the function. Otherwise, the caller and callee have separate -ZA contexts; they do not use ZA to share data. - -Although normal non-shared-ZA functions have a separate ZA context -from their callers, nested uses of ZA are expected to be rare. -The ABI therefore defines a cooperative lazy saving scheme that -allows saves and restore of ZA to be kept to a minimum. -(Callers still have the option of doing a full save and restore -if they prefer.) - -Functions that want to use ZA internally have an arm::new("za") -attribute, which tells the compiler to enable PSTATE.ZA for -the duration of the function body. It also tells the compiler -to commit any lazy save initiated by a caller. - -The patch uses various abstract hard registers to track dataflow -relating to ZA. See the comments in the patch for details. - -The lazy save scheme is intended to be transparent to most normal -functions, so that they don't need to be recompiled for SME. -This is reflected in the way that most normal functions ignore -the new hard registers added in the patch. - -As with arm::streaming and arm::streaming_compatible, the attributes are -also available as __arm_. This has two advantages: it triggers an -error on compilers that don't understand the attributes, and it eases -use on C, where [[...]] attributes were only added in C23. - -gcc/ - * config/aarch64/aarch64-isa-modes.def (ZA_ON): New ISA mode. - * config/aarch64/aarch64-protos.h (aarch64_rdsvl_immediate_p) - (aarch64_output_rdsvl, aarch64_optimize_mode_switching) - (aarch64_restore_za): Declare. - * config/aarch64/constraints.md (UsR): New constraint. - * config/aarch64/aarch64.md (LOWERING_REGNUM, TPIDR_BLOCK_REGNUM) - (SME_STATE_REGNUM, TPIDR2_SETUP_REGNUM, ZA_FREE_REGNUM) - (ZA_SAVED_REGNUM, ZA_REGNUM, FIRST_FAKE_REGNUM): New constants. - (LAST_FAKE_REGNUM): Likewise. - (UNSPEC_SAVE_NZCV, UNSPEC_RESTORE_NZCV, UNSPEC_SME_VQ): New unspecs. - (arches): Add sme. - (arch_enabled): Handle it. - (*cb1): Rename to... - (aarch64_cb1): ...this. - (*movsi_aarch64): Add an alternative for RDSVL. - (*movdi_aarch64): Likewise. - (aarch64_save_nzcv, aarch64_restore_nzcv): New insns. - * config/aarch64/aarch64-sme.md (UNSPEC_SMSTOP_ZA) - (UNSPEC_INITIAL_ZERO_ZA, UNSPEC_TPIDR2_SAVE, UNSPEC_TPIDR2_RESTORE) - (UNSPEC_READ_TPIDR2, UNSPEC_WRITE_TPIDR2, UNSPEC_SETUP_LOCAL_TPIDR2) - (UNSPEC_RESTORE_ZA, UNSPEC_START_PRIVATE_ZA_CALL): New unspecs. - (UNSPEC_END_PRIVATE_ZA_CALL, UNSPEC_COMMIT_LAZY_SAVE): Likewise. - (UNSPECV_ASM_UPDATE_ZA): New unspecv. - (aarch64_tpidr2_save, aarch64_smstart_za, aarch64_smstop_za) - (aarch64_initial_zero_za, aarch64_setup_local_tpidr2) - (aarch64_clear_tpidr2, aarch64_write_tpidr2, aarch64_read_tpidr2) - (aarch64_tpidr2_restore, aarch64_restore_za, aarch64_asm_update_za) - (aarch64_start_private_za_call, aarch64_end_private_za_call) - (aarch64_commit_lazy_save): New patterns. - * config/aarch64/aarch64.h (AARCH64_ISA_ZA_ON, TARGET_ZA): New macros. - (FIXED_REGISTERS, REGISTER_NAMES): Add the new fake ZA registers. - (CALL_USED_REGISTERS): Replace with... - (CALL_REALLY_USED_REGISTERS): ...this and add the fake ZA registers. - (FIRST_PSEUDO_REGISTER): Bump to include the fake ZA registers. - (FAKE_REGS): New register class. - (REG_CLASS_NAMES): Update accordingly. - (REG_CLASS_CONTENTS): Likewise. - (machine_function::tpidr2_block): New member variable. - (machine_function::tpidr2_block_ptr): Likewise. - (machine_function::za_save_buffer): Likewise. - (machine_function::next_asm_update_za_id): Likewise. - (CUMULATIVE_ARGS::shared_za_flags): Likewise. - (aarch64_mode_entity, aarch64_local_sme_state): New enums. - (aarch64_tristate_mode): Likewise. - (OPTIMIZE_MODE_SWITCHING, NUM_MODES_FOR_MODE_SWITCHING): Define. - * config/aarch64/aarch64.cc (AARCH64_STATE_SHARED, AARCH64_STATE_IN) - (AARCH64_STATE_OUT): New constants. - (aarch64_attribute_shared_state_flags): New function. - (aarch64_lookup_shared_state_flags, aarch64_fndecl_has_new_state) - (aarch64_check_state_string, cmp_string_csts): Likewise. - (aarch64_merge_string_arguments, aarch64_check_arm_new_against_type) - (handle_arm_new, handle_arm_shared): Likewise. - (handle_arm_new_za_attribute): New - (aarch64_arm_attribute_table): Add new, preserves, in, out, and inout. - (aarch64_hard_regno_nregs): Handle FAKE_REGS. - (aarch64_hard_regno_mode_ok): Likewise. - (aarch64_fntype_shared_flags, aarch64_fntype_pstate_za): New functions. - (aarch64_fntype_isa_mode): Include aarch64_fntype_pstate_za. - (aarch64_fndecl_has_state, aarch64_fndecl_pstate_za): New functions. - (aarch64_fndecl_isa_mode): Include aarch64_fndecl_pstate_za. - (aarch64_cfun_incoming_pstate_za, aarch64_cfun_shared_flags) - (aarch64_cfun_has_new_state, aarch64_cfun_has_state): New functions. - (aarch64_sme_vq_immediate, aarch64_sme_vq_unspec_p): Likewise. - (aarch64_rdsvl_immediate_p, aarch64_output_rdsvl): Likewise. - (aarch64_expand_mov_immediate): Handle RDSVL immediates. - (aarch64_function_arg): Add the ZA sharing flags as a third limb - of the PARALLEL. - (aarch64_init_cumulative_args): Record the ZA sharing flags. - (aarch64_extra_live_on_entry): New function. Handle the new - ZA-related fake registers. - (aarch64_epilogue_uses): Handle the new ZA-related fake registers. - (aarch64_cannot_force_const_mem): Handle UNSPEC_SME_VQ constants. - (aarch64_get_tpidr2_block, aarch64_get_tpidr2_ptr): New functions. - (aarch64_init_tpidr2_block, aarch64_restore_za): Likewise. - (aarch64_layout_frame): Check whether the current function creates - new ZA state. Record that it clobbers LR if so. - (aarch64_expand_prologue): Handle functions that create new ZA state. - (aarch64_expand_epilogue): Likewise. - (aarch64_create_tpidr2_block): New function. - (aarch64_restore_za): Likewise. - (aarch64_start_call_args): Disallow calls to shared-ZA functions - from functions that have no ZA state. Emit a marker instruction - before calls to private-ZA functions from functions that have - SME state. - (aarch64_expand_call): Add return registers for state that is - managed via attributes. Record the use and clobber information - for the ZA registers. - (aarch64_end_call_args): New function. - (aarch64_regno_regclass): Handle FAKE_REGS. - (aarch64_class_max_nregs): Likewise. - (aarch64_override_options_internal): Require TARGET_SME for - functions that have ZA state. - (aarch64_conditional_register_usage): Handle FAKE_REGS. - (aarch64_mov_operand_p): Handle RDSVL immediates. - (aarch64_comp_type_attributes): Check that the ZA sharing flags - are equal. - (aarch64_merge_decl_attributes): New function. - (aarch64_optimize_mode_switching, aarch64_mode_emit_za_save_buffer) - (aarch64_mode_emit_local_sme_state, aarch64_mode_emit): Likewise. - (aarch64_insn_references_sme_state_p): Likewise. - (aarch64_mode_needed_local_sme_state): Likewise. - (aarch64_mode_needed_za_save_buffer, aarch64_mode_needed): Likewise. - (aarch64_mode_after_local_sme_state, aarch64_mode_after): Likewise. - (aarch64_local_sme_confluence, aarch64_mode_confluence): Likewise. - (aarch64_one_shot_backprop, aarch64_local_sme_backprop): Likewise. - (aarch64_mode_backprop, aarch64_mode_entry): Likewise. - (aarch64_mode_exit, aarch64_mode_eh_handler): Likewise. - (aarch64_mode_priority, aarch64_md_asm_adjust): Likewise. - (TARGET_END_CALL_ARGS, TARGET_MERGE_DECL_ATTRIBUTES): Define. - (TARGET_MODE_EMIT, TARGET_MODE_NEEDED, TARGET_MODE_AFTER): Likewise. - (TARGET_MODE_CONFLUENCE, TARGET_MODE_BACKPROP): Likewise. - (TARGET_MODE_ENTRY, TARGET_MODE_EXIT): Likewise. - (TARGET_MODE_EH_HANDLER, TARGET_MODE_PRIORITY): Likewise. - (TARGET_EXTRA_LIVE_ON_ENTRY): Likewise. - (TARGET_MD_ASM_ADJUST): Use aarch64_md_asm_adjust. - * config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros): - Define __arm_new, __arm_preserves,__arm_in, __arm_out, and __arm_inout. - -gcc/testsuite/ - * gcc.target/aarch64/sme/za_state_1.c: New test. - * gcc.target/aarch64/sme/za_state_2.c: Likewise. - * gcc.target/aarch64/sme/za_state_3.c: Likewise. - * gcc.target/aarch64/sme/za_state_4.c: Likewise. - * gcc.target/aarch64/sme/za_state_5.c: Likewise. - * gcc.target/aarch64/sme/za_state_6.c: Likewise. - * g++.target/aarch64/sme/exceptions_1.C: Likewise. - * gcc.target/aarch64/sme/keyword_macros_1.c: Add ZA macros. - * g++.target/aarch64/sme/keyword_macros_1.C: Likewise. ---- - gcc/config/aarch64/aarch64-c.cc | 32 + - gcc/config/aarch64/aarch64-isa-modes.def | 5 + - gcc/config/aarch64/aarch64-protos.h | 5 + - gcc/config/aarch64/aarch64-sme.md | 287 ++++ - gcc/config/aarch64/aarch64.cc | 1371 ++++++++++++++++- - gcc/config/aarch64/aarch64.h | 98 +- - gcc/config/aarch64/aarch64.md | 81 +- - gcc/config/aarch64/constraints.md | 6 + - .../g++.target/aarch64/sme/exceptions_1.C | 189 +++ - .../g++.target/aarch64/sme/keyword_macros_1.C | 5 + - .../gcc.target/aarch64/sme/keyword_macros_1.c | 5 + - .../gcc.target/aarch64/sme/za_state_1.c | 154 ++ - .../gcc.target/aarch64/sme/za_state_2.c | 73 + - .../gcc.target/aarch64/sme/za_state_3.c | 31 + - .../gcc.target/aarch64/sme/za_state_4.c | 585 +++++++ - .../gcc.target/aarch64/sme/za_state_5.c | 595 +++++++ - .../gcc.target/aarch64/sme/za_state_6.c | 23 + - 17 files changed, 3523 insertions(+), 22 deletions(-) - create mode 100644 gcc/testsuite/g++.target/aarch64/sme/exceptions_1.C - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_2.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_3.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_6.c - -diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc -index 397745fbd..76c20848f 100644 ---- a/gcc/config/aarch64/aarch64-c.cc -+++ b/gcc/config/aarch64/aarch64-c.cc -@@ -73,6 +73,8 @@ aarch64_define_unconditional_macros (cpp_reader *pfile) - - builtin_define ("__GCC_ASM_FLAG_OUTPUTS__"); - -+ builtin_define ("__ARM_STATE_ZA"); -+ - /* Define keyword attributes like __arm_streaming as macros that expand - to the associated [[...]] attribute. Use __extension__ in the attribute - for C, since the [[...]] syntax was only added in C23. */ -@@ -86,6 +88,36 @@ aarch64_define_unconditional_macros (cpp_reader *pfile) - DEFINE_ARM_KEYWORD_MACRO ("streaming_compatible"); - - #undef DEFINE_ARM_KEYWORD_MACRO -+ -+ /* Same for the keyword attributes that take arguments. The snag here -+ is that some old modes warn about or reject variadic arguments. */ -+ auto *cpp_opts = cpp_get_options (parse_in); -+ if (!cpp_opts->traditional) -+ { -+ auto old_warn_variadic_macros = cpp_opts->warn_variadic_macros; -+ auto old_cpp_warn_c90_c99_compat = cpp_opts->cpp_warn_c90_c99_compat; -+ -+ cpp_opts->warn_variadic_macros = false; -+ cpp_opts->cpp_warn_c90_c99_compat = 0; -+ -+#define DEFINE_ARM_KEYWORD_MACRO_ARGS(NAME) \ -+ builtin_define_with_value ("__arm_" NAME "(...)", \ -+ lang_GNU_CXX () \ -+ ? "[[arm::" NAME "(__VA_ARGS__)]]" \ -+ : "[[__extension__ arm::" NAME \ -+ "(__VA_ARGS__)]]", 0); -+ -+ DEFINE_ARM_KEYWORD_MACRO_ARGS ("new"); -+ DEFINE_ARM_KEYWORD_MACRO_ARGS ("preserves"); -+ DEFINE_ARM_KEYWORD_MACRO_ARGS ("in"); -+ DEFINE_ARM_KEYWORD_MACRO_ARGS ("out"); -+ DEFINE_ARM_KEYWORD_MACRO_ARGS ("inout"); -+ -+#undef DEFINE_ARM_KEYWORD_MACRO_ARGS -+ -+ cpp_opts->warn_variadic_macros = old_warn_variadic_macros; -+ cpp_opts->cpp_warn_c90_c99_compat = old_cpp_warn_c90_c99_compat; -+ } - } - - /* Undefine/redefine macros that depend on the current backend state and may -diff --git a/gcc/config/aarch64/aarch64-isa-modes.def b/gcc/config/aarch64/aarch64-isa-modes.def -index 5915c98a8..c0ada35bd 100644 ---- a/gcc/config/aarch64/aarch64-isa-modes.def -+++ b/gcc/config/aarch64/aarch64-isa-modes.def -@@ -32,4 +32,9 @@ - DEF_AARCH64_ISA_MODE(SM_ON) - DEF_AARCH64_ISA_MODE(SM_OFF) - -+/* Indicates that PSTATE.ZA is known to be 1. The converse is that -+ PSTATE.ZA might be 0 or 1, depending on whether there is an uncommitted -+ lazy save. */ -+DEF_AARCH64_ISA_MODE(ZA_ON) -+ - #undef DEF_AARCH64_ISA_MODE -diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h -index 737f47026..0883ddd1a 100644 ---- a/gcc/config/aarch64/aarch64-protos.h -+++ b/gcc/config/aarch64/aarch64-protos.h -@@ -808,6 +808,8 @@ bool aarch64_sve_addvl_addpl_immediate_p (rtx); - bool aarch64_sve_vector_inc_dec_immediate_p (rtx); - int aarch64_add_offset_temporaries (rtx); - void aarch64_split_add_offset (scalar_int_mode, rtx, rtx, rtx, rtx, rtx); -+bool aarch64_rdsvl_immediate_p (const_rtx); -+char *aarch64_output_rdsvl (const_rtx); - bool aarch64_mov_operand_p (rtx, machine_mode); - rtx aarch64_reverse_mask (machine_mode, unsigned int); - bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64); -@@ -1083,4 +1085,7 @@ extern bool aarch64_harden_sls_blr_p (void); - - extern void aarch64_output_patchable_area (unsigned int, bool); - -+bool aarch64_optimize_mode_switching (aarch64_mode_entity); -+void aarch64_restore_za (rtx); -+ - #endif /* GCC_AARCH64_PROTOS_H */ -diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md -index 52427b4f1..d4973098e 100644 ---- a/gcc/config/aarch64/aarch64-sme.md -+++ b/gcc/config/aarch64/aarch64-sme.md -@@ -23,6 +23,7 @@ - ;; == State management - ;; ---- Test current state - ;; ---- PSTATE.SM management -+;; ---- PSTATE.ZA management - - ;; ========================================================================= - ;; == State management -@@ -169,3 +170,289 @@ - "" - "smstop\tsm" - ) -+ -+;; ------------------------------------------------------------------------- -+;; ---- PSTATE.ZA management -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - SMSTART ZA -+;; - SMSTOP ZA -+;; plus calls to support routines. -+;; ------------------------------------------------------------------------- -+ -+(define_c_enum "unspec" [ -+ UNSPEC_SMSTOP_ZA -+ UNSPEC_INITIAL_ZERO_ZA -+ UNSPEC_TPIDR2_SAVE -+ UNSPEC_TPIDR2_RESTORE -+ UNSPEC_READ_TPIDR2 -+ UNSPEC_WRITE_TPIDR2 -+ UNSPEC_SETUP_LOCAL_TPIDR2 -+ UNSPEC_RESTORE_ZA -+ UNSPEC_START_PRIVATE_ZA_CALL -+ UNSPEC_END_PRIVATE_ZA_CALL -+ UNSPEC_COMMIT_LAZY_SAVE -+]) -+ -+(define_c_enum "unspecv" [ -+ UNSPECV_ASM_UPDATE_ZA -+]) -+ -+;; Use the ABI-defined routine to commit an uncommitted lazy save. -+;; This relies on the current PSTATE.ZA, so depends on SME_STATE_REGNUM. -+;; The fake TPIDR2_SETUP_REGNUM register initially holds the incoming -+;; value of the architected TPIDR2_EL0. -+(define_insn "aarch64_tpidr2_save" -+ [(set (reg:DI ZA_FREE_REGNUM) -+ (unspec:DI [(reg:DI SME_STATE_REGNUM) -+ (reg:DI TPIDR2_SETUP_REGNUM)] UNSPEC_TPIDR2_SAVE)) -+ (clobber (reg:DI R14_REGNUM)) -+ (clobber (reg:DI R15_REGNUM)) -+ (clobber (reg:DI R16_REGNUM)) -+ (clobber (reg:DI R17_REGNUM)) -+ (clobber (reg:DI R18_REGNUM)) -+ (clobber (reg:DI R30_REGNUM)) -+ (clobber (reg:CC CC_REGNUM))] -+ "" -+ "bl\t__arm_tpidr2_save" -+) -+ -+;; Set PSTATE.ZA to 1. If ZA was previously dormant or active, -+;; it remains in the same state afterwards, with the same contents. -+;; Otherwise, it goes from off to on with zeroed contents. -+;; -+;; Later writes of TPIDR2_EL0 to a nonzero value must not be moved -+;; up past this instruction, since that could create an invalid -+;; combination of having an active lazy save while ZA is off. -+;; Create an anti-dependence by reading the current contents -+;; of TPIDR2_SETUP_REGNUM. -+;; -+;; Making this depend on ZA_FREE_REGNUM ensures that contents belonging -+;; to the caller have already been saved. That isn't necessary for this -+;; instruction itself, since PSTATE.ZA is already 1 if it contains data. -+;; But doing this here means that other uses of ZA can just depend on -+;; SME_STATE_REGNUM, rather than both SME_STATE_REGNUM and ZA_FREE_REGNUM. -+(define_insn "aarch64_smstart_za" -+ [(set (reg:DI SME_STATE_REGNUM) -+ (const_int 1)) -+ (use (reg:DI TPIDR2_SETUP_REGNUM)) -+ (use (reg:DI ZA_FREE_REGNUM))] -+ "" -+ "smstart\tza" -+) -+ -+;; Disable ZA and discard its current contents. -+;; -+;; The ABI says that the ZA save buffer must be null whenever PSTATE.ZA -+;; is zero, so earlier writes to TPIDR2_EL0 must not be moved down past -+;; this instruction. Depend on TPIDR2_SETUP_REGNUM to ensure this. -+;; -+;; We can only turn ZA off once we know that it is free (i.e. doesn't -+;; contain data belonging to the caller). Depend on ZA_FREE_REGNUM -+;; to ensure this. -+;; -+;; We only turn ZA off when the current function's ZA state is dead, -+;; or perhaps if we're sure that the contents are saved. Either way, -+;; we know whether ZA is saved or not. -+(define_insn "aarch64_smstop_za" -+ [(set (reg:DI SME_STATE_REGNUM) -+ (const_int 0)) -+ (set (reg:DI ZA_SAVED_REGNUM) -+ (unspec:DI [(reg:DI TPIDR2_SETUP_REGNUM) -+ (reg:DI ZA_FREE_REGNUM)] UNSPEC_SMSTOP_ZA))] -+ "" -+ "smstop\tza" -+) -+ -+;; Zero ZA after committing a lazy save. The sequencing is enforced -+;; by reading ZA_FREE_REGNUM. -+(define_insn "aarch64_initial_zero_za" -+ [(set (reg:DI ZA_REGNUM) -+ (unspec:DI [(reg:DI SME_STATE_REGNUM) -+ (reg:DI ZA_FREE_REGNUM)] UNSPEC_INITIAL_ZERO_ZA))] -+ "" -+ "zero\t{ za }" -+) -+ -+;; Initialize the abstract TPIDR2_BLOCK_REGNUM from the contents of -+;; the current function's TPIDR2 block. Other instructions can then -+;; depend on TPIDR2_BLOCK_REGNUM rather than on the memory block. -+(define_insn "aarch64_setup_local_tpidr2" -+ [(set (reg:DI TPIDR2_BLOCK_REGNUM) -+ (unspec:DI [(match_operand:V16QI 0 "memory_operand" "m")] -+ UNSPEC_SETUP_LOCAL_TPIDR2))] -+ "" -+ "" -+ [(set_attr "type" "no_insn")] -+) -+ -+;; Clear TPIDR2_EL0, cancelling any uncommitted lazy save. -+(define_insn "aarch64_clear_tpidr2" -+ [(set (reg:DI TPIDR2_SETUP_REGNUM) -+ (const_int 0))] -+ "" -+ "msr\ttpidr2_el0, xzr" -+) -+ -+;; Point TPIDR2_EL0 to the current function's TPIDR2 block, whose address -+;; is given by operand 0. TPIDR2_BLOCK_REGNUM represents the contents of the -+;; pointed-to block. -+(define_insn "aarch64_write_tpidr2" -+ [(set (reg:DI TPIDR2_SETUP_REGNUM) -+ (unspec:DI [(match_operand 0 "pmode_register_operand" "r") -+ (reg:DI TPIDR2_BLOCK_REGNUM)] UNSPEC_WRITE_TPIDR2))] -+ "" -+ "msr\ttpidr2_el0, %0" -+) -+ -+;; Check whether ZA has been saved. The system depends on the value that -+;; we wrote to TPIDR2_EL0 previously, so it depends on TPDIR2_SETUP_REGNUM. -+(define_insn "aarch64_read_tpidr2" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (unspec:DI [(reg:DI TPIDR2_SETUP_REGNUM) -+ (reg:DI ZA_SAVED_REGNUM)] UNSPEC_READ_TPIDR2))] -+ "" -+ "mrs\t%0, tpidr2_el0" -+) -+ -+;; Use the ABI-defined routine to restore lazy-saved ZA contents -+;; from the TPIDR2 block pointed to by X0. ZA must already be active. -+(define_insn "aarch64_tpidr2_restore" -+ [(set (reg:DI ZA_SAVED_REGNUM) -+ (unspec:DI [(reg:DI R0_REGNUM)] UNSPEC_TPIDR2_RESTORE)) -+ (set (reg:DI SME_STATE_REGNUM) -+ (unspec:DI [(reg:DI SME_STATE_REGNUM)] UNSPEC_TPIDR2_RESTORE)) -+ (clobber (reg:DI R14_REGNUM)) -+ (clobber (reg:DI R15_REGNUM)) -+ (clobber (reg:DI R16_REGNUM)) -+ (clobber (reg:DI R17_REGNUM)) -+ (clobber (reg:DI R18_REGNUM)) -+ (clobber (reg:DI R30_REGNUM)) -+ (clobber (reg:CC CC_REGNUM))] -+ "" -+ "bl\t__arm_tpidr2_restore" -+) -+ -+;; Check whether a lazy save set up by aarch64_save_za was committed -+;; and restore the saved contents if so. -+;; -+;; Operand 0 is the address of the current function's TPIDR2 block. -+(define_insn_and_split "aarch64_restore_za" -+ [(set (reg:DI ZA_SAVED_REGNUM) -+ (unspec:DI [(match_operand 0 "pmode_register_operand" "r") -+ (reg:DI SME_STATE_REGNUM) -+ (reg:DI TPIDR2_SETUP_REGNUM) -+ (reg:DI ZA_SAVED_REGNUM)] UNSPEC_RESTORE_ZA)) -+ (clobber (reg:DI R0_REGNUM)) -+ (clobber (reg:DI R14_REGNUM)) -+ (clobber (reg:DI R15_REGNUM)) -+ (clobber (reg:DI R16_REGNUM)) -+ (clobber (reg:DI R17_REGNUM)) -+ (clobber (reg:DI R18_REGNUM)) -+ (clobber (reg:DI R30_REGNUM)) -+ (clobber (reg:CC CC_REGNUM))] -+ "" -+ "#" -+ "&& epilogue_completed" -+ [(const_int 0)] -+ { -+ auto label = gen_label_rtx (); -+ auto tpidr2 = gen_rtx_REG (DImode, R16_REGNUM); -+ emit_insn (gen_aarch64_read_tpidr2 (tpidr2)); -+ auto jump = emit_likely_jump_insn (gen_aarch64_cbnedi1 (tpidr2, label)); -+ JUMP_LABEL (jump) = label; -+ -+ aarch64_restore_za (operands[0]); -+ emit_label (label); -+ DONE; -+ } -+) -+ -+;; This instruction is emitted after asms that alter ZA, in order to model -+;; the effect on dataflow. The asm itself can't have ZA as an input or -+;; an output, since there is no associated data type. Instead it retains -+;; the original "za" clobber, which on its own would indicate that ZA -+;; is dead. -+;; -+;; The operand is a unique identifier. -+(define_insn "aarch64_asm_update_za" -+ [(set (reg:VNx16QI ZA_REGNUM) -+ (unspec_volatile:VNx16QI -+ [(reg:VNx16QI ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (match_operand 0 "const_int_operand")] -+ UNSPECV_ASM_UPDATE_ZA))] -+ "" -+ "" -+ [(set_attr "type" "no_insn")] -+) -+ -+;; This pseudo-instruction is emitted as part of a call to a private-ZA -+;; function from a function with ZA state. It marks a natural place to set -+;; up a lazy save, if that turns out to be necessary. The save itself -+;; is managed by the mode-switching pass. -+(define_insn "aarch64_start_private_za_call" -+ [(set (reg:DI LOWERING_REGNUM) -+ (unspec:DI [(reg:DI LOWERING_REGNUM)] UNSPEC_START_PRIVATE_ZA_CALL))] -+ "" -+ "" -+ [(set_attr "type" "no_insn")] -+) -+ -+;; This pseudo-instruction is emitted as part of a call to a private-ZA -+;; function from a function with ZA state. It marks a natural place to restore -+;; the current function's ZA contents from the lazy save buffer, if that -+;; turns out to be necessary. The save itself is managed by the -+;; mode-switching pass. -+(define_insn "aarch64_end_private_za_call" -+ [(set (reg:DI LOWERING_REGNUM) -+ (unspec:DI [(reg:DI LOWERING_REGNUM)] UNSPEC_END_PRIVATE_ZA_CALL))] -+ "" -+ "" -+ [(set_attr "type" "no_insn")] -+) -+ -+;; This pseudo-instruction is emitted before a private-ZA function uses -+;; PSTATE.ZA state for the first time. The instruction checks whether -+;; ZA currently contains data belonging to a caller and commits the -+;; lazy save if so. -+;; -+;; Operand 0 is the incoming value of TPIDR2_EL0. Operand 1 is nonzero -+;; if ZA is live, and should therefore be zeroed after committing a save. -+;; -+;; The instruction is generated by the mode-switching pass. It is a -+;; define_insn_and_split rather than a define_expand because of the -+;; internal control flow. -+(define_insn_and_split "aarch64_commit_lazy_save" -+ [(set (reg:DI ZA_FREE_REGNUM) -+ (unspec:DI [(match_operand 0 "pmode_register_operand" "r") -+ (match_operand 1 "const_int_operand") -+ (reg:DI SME_STATE_REGNUM) -+ (reg:DI TPIDR2_SETUP_REGNUM) -+ (reg:VNx16QI ZA_REGNUM)] UNSPEC_COMMIT_LAZY_SAVE)) -+ (set (reg:DI ZA_REGNUM) -+ (unspec:DI [(reg:DI SME_STATE_REGNUM) -+ (reg:DI ZA_FREE_REGNUM)] UNSPEC_INITIAL_ZERO_ZA)) -+ (clobber (reg:DI R14_REGNUM)) -+ (clobber (reg:DI R15_REGNUM)) -+ (clobber (reg:DI R16_REGNUM)) -+ (clobber (reg:DI R17_REGNUM)) -+ (clobber (reg:DI R18_REGNUM)) -+ (clobber (reg:DI R30_REGNUM)) -+ (clobber (reg:CC CC_REGNUM))] -+ "" -+ "#" -+ "true" -+ [(const_int 0)] -+ { -+ auto label = gen_label_rtx (); -+ auto jump = emit_jump_insn (gen_aarch64_cbeqdi1 (operands[0], label)); -+ JUMP_LABEL (jump) = label; -+ emit_insn (gen_aarch64_tpidr2_save ()); -+ emit_insn (gen_aarch64_clear_tpidr2 ()); -+ if (INTVAL (operands[1]) != 0) -+ emit_insn (gen_aarch64_initial_zero_za ()); -+ emit_label (label); -+ DONE; -+ } -+) -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 82f8e574e..a6e996c5b 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -91,6 +91,26 @@ - /* Defined for convenience. */ - #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT) - -+/* Flags that describe how a function shares certain architectural state -+ with its callers. -+ -+ - AARCH64_STATE_SHARED indicates that the function does share the state -+ with callers. -+ -+ - AARCH64_STATE_IN indicates that the function reads (or might read) the -+ incoming state. The converse is that the function ignores the incoming -+ state. -+ -+ - AARCH64_STATE_OUT indicates that the function returns new state. -+ The converse is that the state on return is the same as it was on entry. -+ -+ A function that partially modifies the state treats it as both IN -+ and OUT (because the value on return depends to some extent on the -+ value on input). */ -+constexpr auto AARCH64_STATE_SHARED = 1U << 0; -+constexpr auto AARCH64_STATE_IN = 1U << 1; -+constexpr auto AARCH64_STATE_OUT = 1U << 2; -+ - /* Information about a legitimate vector immediate operand. */ - struct simd_immediate_info - { -@@ -2959,6 +2979,151 @@ static const struct processor all_cores[] = - /* The current tuning set. */ - struct tune_params aarch64_tune_params = generic_tunings; - -+/* If NAME is the name of an arm:: attribute that describes shared state, -+ return its associated AARCH64_STATE_* flags, otherwise return 0. */ -+static unsigned int -+aarch64_attribute_shared_state_flags (const char *name) -+{ -+ if (strcmp (name, "in") == 0) -+ return AARCH64_STATE_SHARED | AARCH64_STATE_IN; -+ if (strcmp (name, "inout") == 0) -+ return AARCH64_STATE_SHARED | AARCH64_STATE_IN | AARCH64_STATE_OUT; -+ if (strcmp (name, "out") == 0) -+ return AARCH64_STATE_SHARED | AARCH64_STATE_OUT; -+ if (strcmp (name, "preserves") == 0) -+ return AARCH64_STATE_SHARED; -+ return 0; -+} -+ -+/* See whether attribute list ATTRS has any sharing information -+ for state STATE_NAME. Return the associated state flags if so, -+ otherwise return 0. */ -+static unsigned int -+aarch64_lookup_shared_state_flags (tree attrs, const char *state_name) -+{ -+ for (tree attr = attrs; attr; attr = TREE_CHAIN (attr)) -+ { -+ if (!cxx11_attribute_p (attr)) -+ continue; -+ -+ auto ns = IDENTIFIER_POINTER (TREE_PURPOSE (TREE_PURPOSE (attr))); -+ if (strcmp (ns, "arm") != 0) -+ continue; -+ -+ auto attr_name = IDENTIFIER_POINTER (TREE_VALUE (TREE_PURPOSE (attr))); -+ auto flags = aarch64_attribute_shared_state_flags (attr_name); -+ if (!flags) -+ continue; -+ -+ for (tree arg = TREE_VALUE (attr); arg; arg = TREE_CHAIN (arg)) -+ { -+ tree value = TREE_VALUE (arg); -+ if (TREE_CODE (value) == STRING_CST -+ && strcmp (TREE_STRING_POINTER (value), state_name) == 0) -+ return flags; -+ } -+ } -+ return 0; -+} -+ -+/* Return true if DECL creates a new scope for state STATE_STRING. */ -+static bool -+aarch64_fndecl_has_new_state (const_tree decl, const char *state_name) -+{ -+ if (tree attr = lookup_attribute ("arm", "new", DECL_ATTRIBUTES (decl))) -+ for (tree arg = TREE_VALUE (attr); arg; arg = TREE_CHAIN (arg)) -+ { -+ tree value = TREE_VALUE (arg); -+ if (TREE_CODE (value) == STRING_CST -+ && strcmp (TREE_STRING_POINTER (value), state_name) == 0) -+ return true; -+ } -+ return false; -+} -+ -+/* Return true if attribute argument VALUE is a recognized state string, -+ otherwise report an error. NAME is the name of the attribute to which -+ VALUE is being passed. */ -+static bool -+aarch64_check_state_string (tree name, tree value) -+{ -+ if (TREE_CODE (value) != STRING_CST) -+ { -+ error ("the arguments to %qE must be constant strings", name); -+ return false; -+ } -+ -+ const char *state_name = TREE_STRING_POINTER (value); -+ if (strcmp (state_name, "za") != 0) -+ { -+ error ("unrecognized state string %qs", state_name); -+ return false; -+ } -+ -+ return true; -+} -+ -+/* qsort callback to compare two STRING_CSTs. */ -+static int -+cmp_string_csts (const void *a, const void *b) -+{ -+ return strcmp (TREE_STRING_POINTER (*(const_tree const *) a), -+ TREE_STRING_POINTER (*(const_tree const *) b)); -+} -+ -+/* Canonicalize a list of state strings. ARGS contains the arguments to -+ a new attribute while OLD_ATTR, if nonnull, contains a previous attribute -+ of the same type. If CAN_MERGE_IN_PLACE, it is safe to adjust OLD_ATTR's -+ arguments and drop the new attribute. Otherwise, the new attribute must -+ be kept and ARGS must include the information in OLD_ATTR. -+ -+ In both cases, the new arguments must be a sorted list of state strings -+ with duplicates removed. -+ -+ Return true if new attribute should be kept, false if it should be -+ dropped. */ -+static bool -+aarch64_merge_string_arguments (tree args, tree old_attr, -+ bool can_merge_in_place) -+{ -+ /* Get a sorted list of all state strings (including duplicates). */ -+ auto add_args = [](vec &strings, const_tree args) -+ { -+ for (const_tree arg = args; arg; arg = TREE_CHAIN (arg)) -+ if (TREE_CODE (TREE_VALUE (arg)) == STRING_CST) -+ strings.safe_push (TREE_VALUE (arg)); -+ }; -+ auto_vec strings; -+ add_args (strings, args); -+ if (old_attr) -+ add_args (strings, TREE_VALUE (old_attr)); -+ strings.qsort (cmp_string_csts); -+ -+ /* The list can be empty if there was no previous attribute and if all -+ the new arguments are erroneous. Drop the attribute in that case. */ -+ if (strings.is_empty ()) -+ return false; -+ -+ /* Destructively modify one of the argument lists, removing duplicates -+ on the fly. */ -+ bool use_old_attr = old_attr && can_merge_in_place; -+ tree *end = use_old_attr ? &TREE_VALUE (old_attr) : &args; -+ tree prev = NULL_TREE; -+ for (tree arg : strings) -+ { -+ if (prev && simple_cst_equal (arg, prev)) -+ continue; -+ prev = arg; -+ if (!*end) -+ *end = tree_cons (NULL_TREE, arg, NULL_TREE); -+ else -+ TREE_VALUE (*end) = arg; -+ end = &TREE_CHAIN (*end); -+ } -+ *end = NULL_TREE; -+ return !use_old_attr; -+} -+ - /* Check whether an 'aarch64_vector_pcs' attribute is valid. */ - - static tree -@@ -2987,6 +3152,101 @@ handle_aarch64_vector_pcs_attribute (tree *node, tree name, tree, - gcc_unreachable (); - } - -+/* Return true if arm::new(ARGS) is compatible with the type of decl DECL, -+ otherwise report an error. */ -+static bool -+aarch64_check_arm_new_against_type (tree args, tree decl) -+{ -+ tree type_attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl)); -+ for (tree arg = args; arg; arg = TREE_CHAIN (arg)) -+ { -+ tree value = TREE_VALUE (arg); -+ if (TREE_CODE (value) == STRING_CST) -+ { -+ const char *state_name = TREE_STRING_POINTER (value); -+ if (aarch64_lookup_shared_state_flags (type_attrs, state_name)) -+ { -+ error_at (DECL_SOURCE_LOCATION (decl), -+ "cannot create a new %qs scope since %qs is shared" -+ " with callers", state_name, state_name); -+ return false; -+ } -+ } -+ } -+ return true; -+} -+ -+/* Callback for arm::new attributes. */ -+static tree -+handle_arm_new (tree *node, tree name, tree args, int, bool *no_add_attrs) -+{ -+ tree decl = *node; -+ if (TREE_CODE (decl) != FUNCTION_DECL) -+ { -+ error ("%qE attribute applies only to function definitions", name); -+ *no_add_attrs = true; -+ return NULL_TREE; -+ } -+ if (TREE_TYPE (decl) == error_mark_node) -+ { -+ *no_add_attrs = true; -+ return NULL_TREE; -+ } -+ -+ for (tree arg = args; arg; arg = TREE_CHAIN (arg)) -+ aarch64_check_state_string (name, TREE_VALUE (arg)); -+ -+ if (!aarch64_check_arm_new_against_type (args, decl)) -+ { -+ *no_add_attrs = true; -+ return NULL_TREE; -+ } -+ -+ /* If there is an old attribute, we should try to update it in-place, -+ so that there is only one (definitive) arm::new attribute on the decl. */ -+ tree old_attr = lookup_attribute ("arm", "new", DECL_ATTRIBUTES (decl)); -+ if (!aarch64_merge_string_arguments (args, old_attr, true)) -+ *no_add_attrs = true; -+ -+ return NULL_TREE; -+} -+ -+/* Callback for arm::{in,out,inout,preserves} attributes. */ -+static tree -+handle_arm_shared (tree *node, tree name, tree args, -+ int, bool *no_add_attrs) -+{ -+ tree type = *node; -+ tree old_attrs = TYPE_ATTRIBUTES (type); -+ auto flags = aarch64_attribute_shared_state_flags (IDENTIFIER_POINTER (name)); -+ for (tree arg = args; arg; arg = TREE_CHAIN (arg)) -+ { -+ tree value = TREE_VALUE (arg); -+ if (aarch64_check_state_string (name, value)) -+ { -+ const char *state_name = TREE_STRING_POINTER (value); -+ auto old_flags = aarch64_lookup_shared_state_flags (old_attrs, -+ state_name); -+ if (old_flags && old_flags != flags) -+ { -+ error ("inconsistent attributes for state %qs", state_name); -+ *no_add_attrs = true; -+ return NULL_TREE; -+ } -+ } -+ } -+ -+ /* We can't update an old attribute in-place, since types are shared. -+ Instead make sure that this new attribute contains all the -+ information, so that the old attribute becomes redundant. */ -+ tree old_attr = lookup_attribute ("arm", IDENTIFIER_POINTER (name), -+ old_attrs); -+ if (!aarch64_merge_string_arguments (args, old_attr, false)) -+ *no_add_attrs = true; -+ -+ return NULL_TREE; -+} -+ - /* Mutually-exclusive function type attributes for controlling PSTATE.SM. */ - static const struct attribute_spec::exclusions attr_streaming_exclusions[] = - { -@@ -3023,6 +3283,16 @@ static const attribute_spec aarch64_arm_attributes[] = - NULL, attr_streaming_exclusions }, - { "streaming_compatible", 0, 0, false, true, true, true, - NULL, attr_streaming_exclusions }, -+ { "new", 1, -1, true, false, false, false, -+ handle_arm_new, NULL }, -+ { "preserves", 1, -1, false, true, true, true, -+ handle_arm_shared, NULL }, -+ { "in", 1, -1, false, true, true, true, -+ handle_arm_shared, NULL }, -+ { "out", 1, -1, false, true, true, true, -+ handle_arm_shared, NULL }, -+ { "inout", 1, -1, false, true, true, true, -+ handle_arm_shared, NULL } - }; - - static const scoped_attribute_specs aarch64_arm_attribute_table = -@@ -4202,6 +4472,7 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode mode) - case PR_HI_REGS: - case FFR_REGS: - case PR_AND_FFR_REGS: -+ case FAKE_REGS: - return 1; - default: - return CEIL (lowest_size, UNITS_PER_WORD); -@@ -4232,6 +4503,10 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode) - if (pr_or_ffr_regnum_p (regno)) - return false; - -+ /* These registers are abstract; their modes don't matter. */ -+ if (FAKE_REGNUM_P (regno)) -+ return true; -+ - if (regno == SP_REGNUM) - /* The purpose of comparing with ptr_mode is to support the - global register variable associated with the stack pointer -@@ -4352,12 +4627,34 @@ aarch64_fntype_pstate_sm (const_tree fntype) - return AARCH64_FL_SM_OFF; - } - -+/* Return state flags that describe whether and how functions of type -+ FNTYPE share state STATE_NAME with their callers. */ -+ -+static unsigned int -+aarch64_fntype_shared_flags (const_tree fntype, const char *state_name) -+{ -+ return aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (fntype), -+ state_name); -+} -+ -+/* Return the state of PSTATE.ZA on entry to functions of type FNTYPE. */ -+ -+static aarch64_feature_flags -+aarch64_fntype_pstate_za (const_tree fntype) -+{ -+ if (aarch64_fntype_shared_flags (fntype, "za")) -+ return AARCH64_FL_ZA_ON; -+ -+ return 0; -+} -+ - /* Return the ISA mode on entry to functions of type FNTYPE. */ - - static aarch64_feature_flags - aarch64_fntype_isa_mode (const_tree fntype) - { -- return aarch64_fntype_pstate_sm (fntype); -+ return (aarch64_fntype_pstate_sm (fntype) -+ | aarch64_fntype_pstate_za (fntype)); - } - - /* Return the state of PSTATE.SM when compiling the body of -@@ -4370,13 +4667,37 @@ aarch64_fndecl_pstate_sm (const_tree fndecl) - return aarch64_fntype_pstate_sm (TREE_TYPE (fndecl)); - } - -+/* Return true if function FNDECL has state STATE_NAME, either by creating -+ new state itself or by sharing state with callers. */ -+ -+static bool -+aarch64_fndecl_has_state (tree fndecl, const char *state_name) -+{ -+ return (aarch64_fndecl_has_new_state (fndecl, state_name) -+ || aarch64_fntype_shared_flags (TREE_TYPE (fndecl), -+ state_name) != 0); -+} -+ -+/* Return the state of PSTATE.ZA when compiling the body of function FNDECL. -+ This might be different from the state of PSTATE.ZA on entry. */ -+ -+static aarch64_feature_flags -+aarch64_fndecl_pstate_za (const_tree fndecl) -+{ -+ if (aarch64_fndecl_has_new_state (fndecl, "za")) -+ return AARCH64_FL_ZA_ON; -+ -+ return aarch64_fntype_pstate_za (TREE_TYPE (fndecl)); -+} -+ - /* Return the ISA mode that should be used to compile the body of - function FNDECL. */ - - static aarch64_feature_flags - aarch64_fndecl_isa_mode (const_tree fndecl) - { -- return aarch64_fndecl_pstate_sm (fndecl); -+ return (aarch64_fndecl_pstate_sm (fndecl) -+ | aarch64_fndecl_pstate_za (fndecl)); - } - - /* Return the state of PSTATE.SM on entry to the current function. -@@ -4389,6 +4710,44 @@ aarch64_cfun_incoming_pstate_sm () - return aarch64_fntype_pstate_sm (TREE_TYPE (cfun->decl)); - } - -+/* Return the state of PSTATE.ZA on entry to the current function. -+ This might be different from the state of PSTATE.ZA in the function -+ body. */ -+ -+static aarch64_feature_flags -+aarch64_cfun_incoming_pstate_za () -+{ -+ return aarch64_fntype_pstate_za (TREE_TYPE (cfun->decl)); -+} -+ -+/* Return state flags that describe whether and how the current function shares -+ state STATE_NAME with callers. */ -+ -+static unsigned int -+aarch64_cfun_shared_flags (const char *state_name) -+{ -+ return aarch64_fntype_shared_flags (TREE_TYPE (cfun->decl), state_name); -+} -+ -+/* Return true if the current function creates new state of type STATE_NAME -+ (as opposed to sharing the state with its callers or ignoring the state -+ altogether). */ -+ -+static bool -+aarch64_cfun_has_new_state (const char *state_name) -+{ -+ return aarch64_fndecl_has_new_state (cfun->decl, state_name); -+} -+ -+/* Return true if the current function has state STATE_NAME, either by -+ creating new state itself or by sharing state with callers. */ -+ -+static bool -+aarch64_cfun_has_state (const char *state_name) -+{ -+ return aarch64_fndecl_has_state (cfun->decl, state_name); -+} -+ - /* Return true if a call from the current function to a function with - ISA mode CALLEE_MODE would involve a change to PSTATE.SM around - the BL instruction. */ -@@ -5952,6 +6311,74 @@ aarch64_output_sve_vector_inc_dec (const char *operands, rtx x) - factor, nelts_per_vq); - } - -+/* Return a constant that represents FACTOR multiplied by the -+ number of 128-bit quadwords in an SME vector. ISA_MODE is the -+ ISA mode in which the calculation is being performed. */ -+ -+static rtx -+aarch64_sme_vq_immediate (machine_mode mode, HOST_WIDE_INT factor, -+ aarch64_feature_flags isa_mode) -+{ -+ gcc_assert (aarch64_sve_rdvl_factor_p (factor)); -+ if (isa_mode & AARCH64_FL_SM_ON) -+ /* We're in streaming mode, so we can use normal poly-int values. */ -+ return gen_int_mode ({ factor, factor }, mode); -+ -+ rtvec vec = gen_rtvec (1, gen_int_mode (factor, SImode)); -+ rtx unspec = gen_rtx_UNSPEC (mode, vec, UNSPEC_SME_VQ); -+ return gen_rtx_CONST (mode, unspec); -+} -+ -+/* Return true if X is a constant that represents some number X -+ multiplied by the number of quadwords in an SME vector. Store this X -+ in *FACTOR if so. */ -+ -+static bool -+aarch64_sme_vq_unspec_p (const_rtx x, HOST_WIDE_INT *factor) -+{ -+ if (!TARGET_SME || GET_CODE (x) != CONST) -+ return false; -+ -+ x = XEXP (x, 0); -+ if (GET_CODE (x) != UNSPEC -+ || XINT (x, 1) != UNSPEC_SME_VQ -+ || XVECLEN (x, 0) != 1) -+ return false; -+ -+ x = XVECEXP (x, 0, 0); -+ if (!CONST_INT_P (x)) -+ return false; -+ -+ *factor = INTVAL (x); -+ return true; -+} -+ -+/* Return true if X is a constant that represents some number Y -+ multiplied by the number of quadwords in an SME vector, and if -+ that Y is in the range of RDSVL. */ -+ -+bool -+aarch64_rdsvl_immediate_p (const_rtx x) -+{ -+ HOST_WIDE_INT factor; -+ return (aarch64_sme_vq_unspec_p (x, &factor) -+ && aarch64_sve_rdvl_factor_p (factor)); -+} -+ -+/* Return the asm string for an RDSVL instruction that calculates X, -+ which is a constant that satisfies aarch64_rdsvl_immediate_p. */ -+ -+char * -+aarch64_output_rdsvl (const_rtx x) -+{ -+ gcc_assert (aarch64_rdsvl_immediate_p (x)); -+ static char buffer[sizeof ("rdsvl\t%x0, #-") + 3 * sizeof (int)]; -+ x = XVECEXP (XEXP (x, 0), 0, 0); -+ snprintf (buffer, sizeof (buffer), "rdsvl\t%%x0, #%d", -+ (int) INTVAL (x) / 16); -+ return buffer; -+} -+ - /* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */ - - static const unsigned HOST_WIDE_INT bitmask_imm_mul[] = -@@ -7717,6 +8144,15 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) - return; - } - -+ if (aarch64_rdsvl_immediate_p (base)) -+ { -+ /* We could handle non-constant offsets if they are ever -+ generated. */ -+ gcc_assert (const_offset == 0); -+ emit_insn (gen_rtx_SET (dest, imm)); -+ return; -+ } -+ - sty = aarch64_classify_symbol (base, const_offset); - switch (sty) - { -@@ -8732,8 +9168,10 @@ aarch64_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg) - rtx abi_cookie = aarch64_gen_callee_cookie (pcum->isa_mode, - pcum->pcs_variant); - rtx sme_mode_switch_args = aarch64_finish_sme_mode_switch_args (pcum); -- return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, abi_cookie, -- sme_mode_switch_args)); -+ rtx shared_za_flags = gen_int_mode (pcum->shared_za_flags, SImode); -+ return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, abi_cookie, -+ sme_mode_switch_args, -+ shared_za_flags)); - } - - aarch64_layout_arg (pcum_v, arg); -@@ -8744,7 +9182,7 @@ void - aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, - const_tree fntype, - rtx libname ATTRIBUTE_UNUSED, -- const_tree fndecl ATTRIBUTE_UNUSED, -+ const_tree fndecl, - unsigned n_named ATTRIBUTE_UNUSED, - bool silent_p) - { -@@ -8769,6 +9207,8 @@ aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, - pcum->aapcs_stack_words = 0; - pcum->aapcs_stack_size = 0; - pcum->silent_p = silent_p; -+ pcum->shared_za_flags -+ = (fntype ? aarch64_fntype_shared_flags (fntype, "za") : 0U); - pcum->num_sme_mode_switch_args = 0; - - if (!silent_p -@@ -10803,14 +11243,31 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - } - } - -+/* Implement TARGET_EXTRA_LIVE_ON_ENTRY. */ -+ -+void -+aarch64_extra_live_on_entry (bitmap regs) -+{ -+ if (TARGET_ZA) -+ { -+ bitmap_set_bit (regs, LOWERING_REGNUM); -+ bitmap_set_bit (regs, SME_STATE_REGNUM); -+ bitmap_set_bit (regs, TPIDR2_SETUP_REGNUM); -+ bitmap_set_bit (regs, ZA_FREE_REGNUM); -+ bitmap_set_bit (regs, ZA_SAVED_REGNUM); -+ -+ /* The only time ZA can't have live contents on entry is when -+ the function explicitly treats it as a pure output. */ -+ auto za_flags = aarch64_cfun_shared_flags ("za"); -+ if (za_flags != (AARCH64_STATE_SHARED | AARCH64_STATE_OUT)) -+ bitmap_set_bit (regs, ZA_REGNUM); -+ } -+} -+ - /* Return 1 if the register is used by the epilogue. We need to say the - return register is used, but only after epilogue generation is complete. - Note that in the case of sibcalls, the values "used by the epilogue" are -- considered live at the start of the called function. -- -- For SIMD functions we need to return 1 for FP registers that are saved and -- restored by a function but are not zero in call_used_regs. If we do not do -- this optimizations may remove the restore of the register. */ -+ considered live at the start of the called function. */ - - int - aarch64_epilogue_uses (int regno) -@@ -10820,6 +11277,18 @@ aarch64_epilogue_uses (int regno) - if (regno == LR_REGNUM) - return 1; - } -+ if (regno == LOWERING_REGNUM && TARGET_ZA) -+ return 1; -+ if (regno == SME_STATE_REGNUM && TARGET_ZA) -+ return 1; -+ if (regno == TPIDR2_SETUP_REGNUM && TARGET_ZA) -+ return 1; -+ /* If the function shares SME state with its caller, ensure that that -+ data is not in the lazy save buffer on exit. */ -+ if (regno == ZA_SAVED_REGNUM && aarch64_cfun_incoming_pstate_za () != 0) -+ return 1; -+ if (regno == ZA_REGNUM && aarch64_cfun_shared_flags ("za") != 0) -+ return 1; - return 0; - } - -@@ -11501,8 +11970,10 @@ aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) - - /* There's no way to calculate VL-based values using relocations. */ - subrtx_iterator::array_type array; -+ HOST_WIDE_INT factor; - FOR_EACH_SUBRTX (iter, array, x, ALL) -- if (GET_CODE (*iter) == CONST_POLY_INT) -+ if (GET_CODE (*iter) == CONST_POLY_INT -+ || aarch64_sme_vq_unspec_p (x, &factor)) - return true; - - poly_int64 offset; -@@ -12364,6 +12835,72 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) - return true; - } - -+/* Return a fresh memory reference to the current function's TPIDR2 block, -+ creating a block if necessary. */ -+ -+static rtx -+aarch64_get_tpidr2_block () -+{ -+ if (!cfun->machine->tpidr2_block) -+ /* The TPIDR2 block is 16 bytes in size and must be aligned to a 128-bit -+ boundary. */ -+ cfun->machine->tpidr2_block = assign_stack_local (V16QImode, 16, 128); -+ return copy_rtx (cfun->machine->tpidr2_block); -+} -+ -+/* Return a fresh register that points to the current function's -+ TPIDR2 block, creating a block if necessary. */ -+ -+static rtx -+aarch64_get_tpidr2_ptr () -+{ -+ rtx block = aarch64_get_tpidr2_block (); -+ return force_reg (Pmode, XEXP (block, 0)); -+} -+ -+/* Emit instructions to allocate a ZA lazy save buffer and initialize the -+ current function's TPIDR2 block. */ -+ -+static void -+aarch64_init_tpidr2_block () -+{ -+ rtx block = aarch64_get_tpidr2_block (); -+ -+ /* The ZA save buffer is SVL.B*SVL.B bytes in size. */ -+ rtx svl_bytes = aarch64_sme_vq_immediate (Pmode, 16, AARCH64_ISA_MODE); -+ rtx svl_bytes_reg = force_reg (DImode, svl_bytes); -+ rtx za_size = expand_simple_binop (Pmode, MULT, svl_bytes_reg, -+ svl_bytes_reg, NULL, 0, OPTAB_LIB_WIDEN); -+ rtx za_save_buffer = allocate_dynamic_stack_space (za_size, 128, -+ BITS_PER_UNIT, -1, true); -+ za_save_buffer = force_reg (Pmode, za_save_buffer); -+ cfun->machine->za_save_buffer = za_save_buffer; -+ -+ /* The first word of the block points to the save buffer and the second -+ word is the number of ZA slices to save. */ -+ rtx block_0 = adjust_address (block, DImode, 0); -+ rtx block_8 = adjust_address (block, DImode, 8); -+ emit_insn (gen_store_pair_dw_didi (block_0, za_save_buffer, -+ block_8, svl_bytes_reg)); -+ -+ if (!memory_operand (block, V16QImode)) -+ block = replace_equiv_address (block, force_reg (Pmode, XEXP (block, 0))); -+ emit_insn (gen_aarch64_setup_local_tpidr2 (block)); -+} -+ -+/* Restore the contents of ZA from the lazy save buffer, given that -+ register TPIDR2_BLOCK points to the current function's TPIDR2 block. -+ PSTATE.ZA is known to be 0 and TPIDR2_EL0 is known to be null. */ -+ -+void -+aarch64_restore_za (rtx tpidr2_block) -+{ -+ emit_insn (gen_aarch64_smstart_za ()); -+ if (REGNO (tpidr2_block) != R0_REGNUM) -+ emit_move_insn (gen_rtx_REG (Pmode, R0_REGNUM), tpidr2_block); -+ emit_insn (gen_aarch64_tpidr2_restore ()); -+} -+ - /* Implement TARGET_START_CALL_ARGS. */ - - static void -@@ -12379,6 +12916,20 @@ aarch64_start_call_args (cumulative_args_t ca_v) - " option %<-march%>, or by using the %" - " attribute or pragma", "sme"); - } -+ -+ if ((ca->shared_za_flags & (AARCH64_STATE_IN | AARCH64_STATE_OUT)) -+ && !aarch64_cfun_has_state ("za")) -+ error ("call to a function that shares %qs state from a function" -+ " that has no %qs state", "za", "za"); -+ else if (!TARGET_ZA && (ca->isa_mode & AARCH64_FL_ZA_ON)) -+ error ("call to a function that shares SME state from a function" -+ " that has no SME state"); -+ -+ /* If this is a call to a private ZA function, emit a marker to -+ indicate where any necessary set-up code could be inserted. -+ The code itself is inserted by the mode-switching pass. */ -+ if (TARGET_ZA && !(ca->isa_mode & AARCH64_FL_ZA_ON)) -+ emit_insn (gen_aarch64_start_private_za_call ()); - } - - /* This function is used by the call expanders of the machine description. -@@ -12391,6 +12942,8 @@ aarch64_start_call_args (cumulative_args_t ca_v) - The second element is a PARALLEL that lists all the argument - registers that need to be saved and restored around a change - in PSTATE.SM, or const0_rtx if no such switch is needed. -+ The third element is a const_int that contains the sharing flags -+ for ZA. - SIBCALL indicates whether this function call is normal call or sibling call. - It will generate different pattern accordingly. */ - -@@ -12403,10 +12956,12 @@ aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall) - - rtx callee_abi = cookie; - rtx sme_mode_switch_args = const0_rtx; -+ unsigned int shared_za_flags = 0; - if (GET_CODE (cookie) == PARALLEL) - { - callee_abi = XVECEXP (cookie, 0, 0); - sme_mode_switch_args = XVECEXP (cookie, 0, 1); -+ shared_za_flags = INTVAL (XVECEXP (cookie, 0, 2)); - } - - gcc_assert (CONST_INT_P (callee_abi)); -@@ -12426,6 +12981,41 @@ aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall) - : !REG_P (callee)) - XEXP (mem, 0) = force_reg (mode, callee); - -+ /* Accumulate the return values, including state that is shared via -+ attributes. */ -+ auto_vec return_values; -+ if (result) -+ { -+ if (GET_CODE (result) == PARALLEL) -+ for (int i = 0; i < XVECLEN (result, 0); ++i) -+ return_values.safe_push (XVECEXP (result, 0, i)); -+ else -+ return_values.safe_push (result); -+ } -+ unsigned int orig_num_return_values = return_values.length (); -+ if (shared_za_flags & AARCH64_STATE_OUT) -+ return_values.safe_push (gen_rtx_REG (VNx16BImode, ZA_REGNUM)); -+ /* When calling private-ZA functions from functions with ZA state, -+ we want to know whether the call committed a lazy save. */ -+ if (TARGET_ZA && !shared_za_flags) -+ return_values.safe_push (gen_rtx_REG (VNx16BImode, ZA_SAVED_REGNUM)); -+ -+ /* Create the new return value, if necessary. */ -+ if (orig_num_return_values != return_values.length ()) -+ { -+ if (return_values.length () == 1) -+ result = return_values[0]; -+ else -+ { -+ for (rtx &x : return_values) -+ if (GET_CODE (x) != EXPR_LIST) -+ x = gen_rtx_EXPR_LIST (VOIDmode, x, const0_rtx); -+ rtvec v = gen_rtvec_v (return_values.length (), -+ return_values.address ()); -+ result = gen_rtx_PARALLEL (VOIDmode, v); -+ } -+ } -+ - call = gen_rtx_CALL (VOIDmode, mem, const0_rtx); - - if (result != NULL_RTX) -@@ -12492,6 +13082,50 @@ aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall) - - cfun->machine->call_switches_pstate_sm = true; - } -+ -+ /* Add any ZA-related information. -+ ZA_REGNUM represents the current function's ZA state, rather than -+ the contents of the ZA register itself. We ensure that the function's -+ ZA state is preserved by private-ZA call sequences, so the call itself -+ does not use or clobber ZA_REGNUM. */ -+ if (TARGET_ZA) -+ { -+ /* The callee requires ZA to be active if the callee is shared-ZA, -+ otherwise it requires ZA to be dormant or off. The state of ZA is -+ captured by a combination of SME_STATE_REGNUM, TPIDR2_SETUP_REGNUM, -+ and ZA_SAVED_REGNUM. */ -+ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), -+ gen_rtx_REG (DImode, SME_STATE_REGNUM)); -+ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), -+ gen_rtx_REG (DImode, TPIDR2_SETUP_REGNUM)); -+ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), -+ gen_rtx_REG (VNx16BImode, ZA_SAVED_REGNUM)); -+ -+ /* Keep the aarch64_start/end_private_za_call markers live. */ -+ if (!(callee_isa_mode & AARCH64_FL_ZA_ON)) -+ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), -+ gen_rtx_REG (VNx16BImode, LOWERING_REGNUM)); -+ -+ /* If the callee is a shared-ZA function, record whether it uses the -+ current value of ZA. */ -+ if (shared_za_flags & AARCH64_STATE_IN) -+ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), -+ gen_rtx_REG (VNx16BImode, ZA_REGNUM)); -+ } -+} -+ -+/* Implement TARGET_END_CALL_ARGS. */ -+ -+static void -+aarch64_end_call_args (cumulative_args_t ca_v) -+{ -+ CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v); -+ -+ /* If this is a call to a private ZA function, emit a marker to -+ indicate where any necessary restoration code could be inserted. -+ The code itself is inserted by the mode-switching pass. */ -+ if (TARGET_ZA && !(ca->isa_mode & AARCH64_FL_ZA_ON)) -+ emit_insn (gen_aarch64_end_private_za_call ()); - } - - /* Emit call insn with PAT and do aarch64-specific handling. */ -@@ -13602,6 +14236,9 @@ aarch64_regno_regclass (unsigned regno) - if (regno == FFR_REGNUM || regno == FFRT_REGNUM) - return FFR_REGS; - -+ if (FAKE_REGNUM_P (regno)) -+ return FAKE_REGS; -+ - return NO_REGS; - } - -@@ -13957,12 +14594,14 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) - return (vec_flags & VEC_ADVSIMD - ? CEIL (lowest_size, UNITS_PER_VREG) - : CEIL (lowest_size, UNITS_PER_WORD)); -+ - case STACK_REG: - case PR_REGS: - case PR_LO_REGS: - case PR_HI_REGS: - case FFR_REGS: - case PR_AND_FFR_REGS: -+ case FAKE_REGS: - return 1; - - case NO_REGS: -@@ -19002,10 +19641,14 @@ aarch64_override_options_internal (struct gcc_options *opts) - && !fixed_regs[R18_REGNUM]) - error ("%<-fsanitize=shadow-call-stack%> requires %<-ffixed-x18%>"); - -- if ((opts->x_aarch64_isa_flags & AARCH64_FL_SM_ON) -+ if ((opts->x_aarch64_isa_flags & (AARCH64_FL_SM_ON | AARCH64_FL_ZA_ON)) - && !(opts->x_aarch64_isa_flags & AARCH64_FL_SME)) - { -- error ("streaming functions require the ISA extension %qs", "sme"); -+ if (opts->x_aarch64_isa_flags & AARCH64_FL_SM_ON) -+ error ("streaming functions require the ISA extension %qs", "sme"); -+ else -+ error ("functions with SME state require the ISA extension %qs", -+ "sme"); - inform (input_location, "you can enable %qs using the command-line" - " option %<-march%>, or by using the %" - " attribute or pragma", "sme"); -@@ -21341,6 +21984,8 @@ aarch64_conditional_register_usage (void) - CLEAR_HARD_REG_BIT (operand_reg_set, VG_REGNUM); - CLEAR_HARD_REG_BIT (operand_reg_set, FFR_REGNUM); - CLEAR_HARD_REG_BIT (operand_reg_set, FFRT_REGNUM); -+ for (int i = FIRST_FAKE_REGNUM; i <= LAST_FAKE_REGNUM; ++i) -+ CLEAR_HARD_REG_BIT (operand_reg_set, i); - - /* When tracking speculation, we need a couple of call-clobbered registers - to track the speculation state. It would be nice to just use -@@ -22795,6 +23440,9 @@ aarch64_mov_operand_p (rtx x, machine_mode mode) - || aarch64_sve_rdvl_immediate_p (x))) - return true; - -+ if (aarch64_rdsvl_immediate_p (x)) -+ return true; -+ - return aarch64_classify_symbolic_expression (x) - == SYMBOL_TINY_ABSOLUTE; - } -@@ -28266,9 +28914,45 @@ aarch64_comp_type_attributes (const_tree type1, const_tree type2) - return 0; - if (!check_attr ("arm", "streaming_compatible")) - return 0; -+ if (aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (type1), "za") -+ != aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (type2), "za")) -+ return 0; - return 1; - } - -+/* Implement TARGET_MERGE_DECL_ATTRIBUTES. */ -+ -+static tree -+aarch64_merge_decl_attributes (tree olddecl, tree newdecl) -+{ -+ tree old_attrs = DECL_ATTRIBUTES (olddecl); -+ tree old_new = lookup_attribute ("arm", "new", old_attrs); -+ -+ tree new_attrs = DECL_ATTRIBUTES (newdecl); -+ tree new_new = lookup_attribute ("arm", "new", new_attrs); -+ -+ if (DECL_INITIAL (olddecl) && new_new) -+ { -+ error ("cannot apply attribute %qs to %q+D after the function" -+ " has been defined", "new", newdecl); -+ inform (DECL_SOURCE_LOCATION (olddecl), "%q+D defined here", -+ newdecl); -+ } -+ else -+ { -+ if (old_new && new_new) -+ { -+ old_attrs = remove_attribute ("arm", "new", old_attrs); -+ TREE_VALUE (new_new) = chainon (TREE_VALUE (new_new), -+ TREE_VALUE (old_new)); -+ } -+ if (new_new) -+ aarch64_check_arm_new_against_type (TREE_VALUE (new_new), newdecl); -+ } -+ -+ return merge_attributes (old_attrs, new_attrs); -+} -+ - /* Implement TARGET_GET_MULTILIB_ABI_NAME */ - - static const char * -@@ -28634,6 +29318,629 @@ aarch64_indirect_call_asm (rtx addr) - return ""; - } - -+/* Implement OPTIMIZE_MODE_SWITCHING. */ -+ -+bool -+aarch64_optimize_mode_switching (aarch64_mode_entity entity) -+{ -+ bool have_sme_state = (aarch64_cfun_incoming_pstate_za () != 0 -+ || (aarch64_cfun_has_new_state ("za") -+ && df_regs_ever_live_p (ZA_REGNUM))); -+ -+ if (have_sme_state && nonlocal_goto_handler_labels) -+ { -+ static bool reported; -+ if (!reported) -+ { -+ sorry ("non-local gotos in functions with SME state"); -+ reported = true; -+ } -+ } -+ -+ switch (entity) -+ { -+ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: -+ case aarch64_mode_entity::LOCAL_SME_STATE: -+ return have_sme_state && !nonlocal_goto_handler_labels; -+ } -+ gcc_unreachable (); -+} -+ -+/* Implement TARGET_MODE_EMIT for ZA_SAVE_BUFFER. */ -+ -+static void -+aarch64_mode_emit_za_save_buffer (aarch64_tristate_mode mode, -+ aarch64_tristate_mode prev_mode) -+{ -+ if (mode == aarch64_tristate_mode::YES) -+ { -+ gcc_assert (prev_mode == aarch64_tristate_mode::NO); -+ aarch64_init_tpidr2_block (); -+ } -+ else -+ gcc_unreachable (); -+} -+ -+/* Implement TARGET_MODE_EMIT for LOCAL_SME_STATE. */ -+ -+static void -+aarch64_mode_emit_local_sme_state (aarch64_local_sme_state mode, -+ aarch64_local_sme_state prev_mode) -+{ -+ /* Back-propagation should ensure that we're always starting from -+ a known mode. */ -+ gcc_assert (prev_mode != aarch64_local_sme_state::ANY); -+ -+ if (prev_mode == aarch64_local_sme_state::INACTIVE_CALLER) -+ { -+ /* Commit any uncommitted lazy save. This leaves ZA either active -+ and zero (lazy save case) or off (normal case). -+ -+ The sequence is: -+ -+ mrs , tpidr2_el0 -+ cbz , no_save -+ bl __arm_tpidr2_save -+ msr tpidr2_el0, xzr -+ zero { za } // Only if ZA is live -+ no_save: */ -+ bool is_active = (mode == aarch64_local_sme_state::ACTIVE_LIVE -+ || mode == aarch64_local_sme_state::ACTIVE_DEAD); -+ auto tmp_reg = gen_reg_rtx (DImode); -+ auto active_flag = gen_int_mode (is_active, DImode); -+ emit_insn (gen_aarch64_read_tpidr2 (tmp_reg)); -+ emit_insn (gen_aarch64_commit_lazy_save (tmp_reg, active_flag)); -+ } -+ -+ if (mode == aarch64_local_sme_state::ACTIVE_LIVE -+ || mode == aarch64_local_sme_state::ACTIVE_DEAD) -+ { -+ if (prev_mode == aarch64_local_sme_state::INACTIVE_LOCAL) -+ { -+ /* Make ZA active after being inactive. -+ -+ First handle the case in which the lazy save we set up was -+ committed by a callee. If the function's source-level ZA state -+ is live then we must conditionally restore it from the lazy -+ save buffer. Otherwise we can just force PSTATE.ZA to 1. */ -+ if (mode == aarch64_local_sme_state::ACTIVE_LIVE) -+ emit_insn (gen_aarch64_restore_za (aarch64_get_tpidr2_ptr ())); -+ else -+ emit_insn (gen_aarch64_smstart_za ()); -+ -+ /* Now handle the case in which the lazy save was not committed. -+ In that case, ZA still contains the current function's ZA state, -+ and we just need to cancel the lazy save. */ -+ emit_insn (gen_aarch64_clear_tpidr2 ()); -+ return; -+ } -+ -+ if (prev_mode == aarch64_local_sme_state::SAVED_LOCAL) -+ { -+ /* Retrieve the current function's ZA state from the lazy save -+ buffer. */ -+ aarch64_restore_za (aarch64_get_tpidr2_ptr ()); -+ return; -+ } -+ -+ if (prev_mode == aarch64_local_sme_state::INACTIVE_CALLER -+ || prev_mode == aarch64_local_sme_state::OFF) -+ { -+ /* INACTIVE_CALLER means that we are enabling ZA for the first -+ time in this function. The code above means that ZA is either -+ active and zero (if we committed a lazy save) or off. Handle -+ the latter case by forcing ZA on. -+ -+ OFF means that PSTATE.ZA is guaranteed to be 0. We just need -+ to force it to 1. -+ -+ Both cases leave ZA zeroed. */ -+ emit_insn (gen_aarch64_smstart_za ()); -+ return; -+ } -+ -+ if (prev_mode == aarch64_local_sme_state::ACTIVE_DEAD -+ || prev_mode == aarch64_local_sme_state::ACTIVE_LIVE) -+ /* A simple change in liveness, such as in a CFG structure where -+ ZA is only conditionally defined. No code is needed. */ -+ return; -+ -+ gcc_unreachable (); -+ } -+ -+ if (mode == aarch64_local_sme_state::INACTIVE_LOCAL) -+ { -+ if (prev_mode == aarch64_local_sme_state::ACTIVE_LIVE -+ || prev_mode == aarch64_local_sme_state::ACTIVE_DEAD -+ || prev_mode == aarch64_local_sme_state::INACTIVE_CALLER) -+ { -+ /* A transition from ACTIVE_LIVE to INACTIVE_LOCAL is the usual -+ case of setting up a lazy save buffer before a call. -+ A transition from INACTIVE_CALLER is similar, except that -+ the contents of ZA are known to be zero. -+ -+ A transition from ACTIVE_DEAD means that ZA is live at the -+ point of the transition, but is dead on at least one incoming -+ edge. (That is, ZA is only conditionally initialized.) -+ For efficiency, we want to set up a lazy save even for -+ dead contents, since forcing ZA off would make later code -+ restore ZA from the lazy save buffer. */ -+ emit_insn (gen_aarch64_write_tpidr2 (aarch64_get_tpidr2_ptr ())); -+ return; -+ } -+ -+ if (prev_mode == aarch64_local_sme_state::SAVED_LOCAL -+ || prev_mode == aarch64_local_sme_state::OFF) -+ /* We're simply discarding the information about which inactive -+ state applies. */ -+ return; -+ -+ gcc_unreachable (); -+ } -+ -+ if (mode == aarch64_local_sme_state::INACTIVE_CALLER -+ || mode == aarch64_local_sme_state::OFF) -+ { -+ /* The transition to INACTIVE_CALLER is used before returning from -+ new("za") functions. Any state in ZA belongs to the current -+ function rather than a caller, but that state is no longer -+ needed. Clear any pending lazy save and turn ZA off. -+ -+ The transition to OFF is used before calling a private-ZA function. -+ We committed any incoming lazy save above, so at this point any -+ contents in ZA belong to the current function. */ -+ if (prev_mode == aarch64_local_sme_state::INACTIVE_LOCAL) -+ emit_insn (gen_aarch64_clear_tpidr2 ()); -+ -+ if (prev_mode != aarch64_local_sme_state::OFF -+ && prev_mode != aarch64_local_sme_state::SAVED_LOCAL) -+ emit_insn (gen_aarch64_smstop_za ()); -+ -+ return; -+ } -+ -+ if (mode == aarch64_local_sme_state::SAVED_LOCAL) -+ { -+ /* This is a transition to an exception handler. */ -+ gcc_assert (prev_mode == aarch64_local_sme_state::OFF -+ || prev_mode == aarch64_local_sme_state::INACTIVE_LOCAL); -+ return; -+ } -+ -+ gcc_unreachable (); -+} -+ -+/* Implement TARGET_MODE_EMIT. */ -+ -+static void -+aarch64_mode_emit (int entity, int mode, int prev_mode, HARD_REG_SET live) -+{ -+ if (mode == prev_mode) -+ return; -+ -+ start_sequence (); -+ switch (aarch64_mode_entity (entity)) -+ { -+ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: -+ aarch64_mode_emit_za_save_buffer (aarch64_tristate_mode (mode), -+ aarch64_tristate_mode (prev_mode)); -+ break; -+ -+ case aarch64_mode_entity::LOCAL_SME_STATE: -+ aarch64_mode_emit_local_sme_state (aarch64_local_sme_state (mode), -+ aarch64_local_sme_state (prev_mode)); -+ break; -+ } -+ rtx_insn *seq = get_insns (); -+ end_sequence (); -+ -+ /* Get the set of clobbered registers that are currently live. */ -+ HARD_REG_SET clobbers = {}; -+ for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn)) -+ { -+ vec_rtx_properties properties; -+ properties.add_insn (insn, false); -+ for (rtx_obj_reference ref : properties.refs ()) -+ if (ref.is_write () && HARD_REGISTER_NUM_P (ref.regno)) -+ SET_HARD_REG_BIT (clobbers, ref.regno); -+ } -+ clobbers &= live; -+ -+ /* Emit instructions to save clobbered registers to pseudos. Queue -+ instructions to restore the registers afterwards. -+ -+ This should only needed in rare situations. */ -+ auto_vec after; -+ for (unsigned int regno = R0_REGNUM; regno < R30_REGNUM; ++regno) -+ if (TEST_HARD_REG_BIT (clobbers, regno)) -+ { -+ rtx hard_reg = gen_rtx_REG (DImode, regno); -+ rtx pseudo_reg = gen_reg_rtx (DImode); -+ emit_move_insn (pseudo_reg, hard_reg); -+ after.quick_push (gen_move_insn (hard_reg, pseudo_reg)); -+ } -+ if (TEST_HARD_REG_BIT (clobbers, CC_REGNUM)) -+ { -+ rtx pseudo_reg = gen_reg_rtx (DImode); -+ emit_insn (gen_aarch64_save_nzcv (pseudo_reg)); -+ after.quick_push (gen_aarch64_restore_nzcv (pseudo_reg)); -+ } -+ -+ /* Emit the transition instructions themselves. */ -+ emit_insn (seq); -+ -+ /* Restore the clobbered registers. */ -+ for (auto *insn : after) -+ emit_insn (insn); -+} -+ -+/* Return true if INSN references the SME state represented by hard register -+ REGNO. */ -+ -+static bool -+aarch64_insn_references_sme_state_p (rtx_insn *insn, unsigned int regno) -+{ -+ df_ref ref; -+ FOR_EACH_INSN_DEF (ref, insn) -+ if (!DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER) -+ && DF_REF_REGNO (ref) == regno) -+ return true; -+ FOR_EACH_INSN_USE (ref, insn) -+ if (DF_REF_REGNO (ref) == regno) -+ return true; -+ return false; -+} -+ -+/* Implement TARGET_MODE_NEEDED for LOCAL_SME_STATE. */ -+ -+static aarch64_local_sme_state -+aarch64_mode_needed_local_sme_state (rtx_insn *insn, HARD_REG_SET live) -+{ -+ if (!CALL_P (insn) -+ && find_reg_note (insn, REG_EH_REGION, NULL_RTX)) -+ { -+ static bool reported; -+ if (!reported) -+ { -+ sorry ("catching non-call exceptions in functions with SME state"); -+ reported = true; -+ } -+ /* Aim for graceful error recovery by picking the value that is -+ least likely to generate an ICE. */ -+ return aarch64_local_sme_state::INACTIVE_LOCAL; -+ } -+ -+ /* A non-local goto is equivalent to a return. We disallow non-local -+ receivers in functions with SME state, so we know that the target -+ expects ZA to be dormant or off. */ -+ if (JUMP_P (insn) -+ && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX)) -+ return aarch64_local_sme_state::INACTIVE_CALLER; -+ -+ /* start_private_za_call and end_private_za_call bracket a sequence -+ that calls a private-ZA function. Force ZA to be turned off if the -+ function doesn't have any live ZA state, otherwise require ZA to be -+ inactive. */ -+ auto icode = recog_memoized (insn); -+ if (icode == CODE_FOR_aarch64_start_private_za_call -+ || icode == CODE_FOR_aarch64_end_private_za_call) -+ return (TEST_HARD_REG_BIT (live, ZA_REGNUM) -+ ? aarch64_local_sme_state::INACTIVE_LOCAL -+ : aarch64_local_sme_state::OFF); -+ -+ /* Force ZA to contain the current function's ZA state if INSN wants -+ to access it. */ -+ if (aarch64_insn_references_sme_state_p (insn, ZA_REGNUM)) -+ return (TEST_HARD_REG_BIT (live, ZA_REGNUM) -+ ? aarch64_local_sme_state::ACTIVE_LIVE -+ : aarch64_local_sme_state::ACTIVE_DEAD); -+ -+ return aarch64_local_sme_state::ANY; -+} -+ -+/* Implement TARGET_MODE_NEEDED for ZA_SAVE_BUFFER. */ -+ -+static aarch64_tristate_mode -+aarch64_mode_needed_za_save_buffer (rtx_insn *insn, HARD_REG_SET live) -+{ -+ /* We need to set up a lazy save buffer no later than the first -+ transition to INACTIVE_LOCAL (which involves setting up a lazy save). */ -+ if (aarch64_mode_needed_local_sme_state (insn, live) -+ == aarch64_local_sme_state::INACTIVE_LOCAL) -+ return aarch64_tristate_mode::YES; -+ -+ /* Also make sure that the lazy save buffer is set up before the first -+ insn that throws internally. The exception handler will sometimes -+ load from it. */ -+ if (find_reg_note (insn, REG_EH_REGION, NULL_RTX)) -+ return aarch64_tristate_mode::YES; -+ -+ return aarch64_tristate_mode::MAYBE; -+} -+ -+/* Implement TARGET_MODE_NEEDED. */ -+ -+static int -+aarch64_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET live) -+{ -+ switch (aarch64_mode_entity (entity)) -+ { -+ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: -+ return int (aarch64_mode_needed_za_save_buffer (insn, live)); -+ -+ case aarch64_mode_entity::LOCAL_SME_STATE: -+ return int (aarch64_mode_needed_local_sme_state (insn, live)); -+ } -+ gcc_unreachable (); -+} -+ -+/* Implement TARGET_MODE_AFTER for LOCAL_SME_STATE. */ -+ -+static aarch64_local_sme_state -+aarch64_mode_after_local_sme_state (aarch64_local_sme_state mode, -+ HARD_REG_SET live) -+{ -+ /* Note places where ZA dies, so that we can try to avoid saving and -+ restoring state that isn't needed. */ -+ if (mode == aarch64_local_sme_state::ACTIVE_LIVE -+ && !TEST_HARD_REG_BIT (live, ZA_REGNUM)) -+ return aarch64_local_sme_state::ACTIVE_DEAD; -+ -+ /* Note where ZA is born, e.g. when moving past an __arm_out("za") -+ function. */ -+ if (mode == aarch64_local_sme_state::ACTIVE_DEAD -+ && TEST_HARD_REG_BIT (live, ZA_REGNUM)) -+ return aarch64_local_sme_state::ACTIVE_LIVE; -+ -+ return mode; -+} -+ -+/* Implement TARGET_MODE_AFTER. */ -+ -+static int -+aarch64_mode_after (int entity, int mode, rtx_insn *, HARD_REG_SET live) -+{ -+ switch (aarch64_mode_entity (entity)) -+ { -+ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: -+ return mode; -+ -+ case aarch64_mode_entity::LOCAL_SME_STATE: -+ return int (aarch64_mode_after_local_sme_state -+ (aarch64_local_sme_state (mode), live)); -+ } -+ gcc_unreachable (); -+} -+ -+/* Implement TARGET_MODE_CONFLUENCE for LOCAL_SME_STATE. */ -+ -+static aarch64_local_sme_state -+aarch64_local_sme_confluence (aarch64_local_sme_state mode1, -+ aarch64_local_sme_state mode2) -+{ -+ /* Perform a symmetrical check for two values. */ -+ auto is_pair = [&](aarch64_local_sme_state val1, -+ aarch64_local_sme_state val2) -+ { -+ return ((mode1 == val1 && mode2 == val2) -+ || (mode1 == val2 && mode2 == val1)); -+ }; -+ -+ /* INACTIVE_CALLER means ZA is off or it has dormant contents belonging -+ to a caller. OFF is one of the options. */ -+ if (is_pair (aarch64_local_sme_state::INACTIVE_CALLER, -+ aarch64_local_sme_state::OFF)) -+ return aarch64_local_sme_state::INACTIVE_CALLER; -+ -+ /* Similarly for dormant contents belonging to the current function. */ -+ if (is_pair (aarch64_local_sme_state::INACTIVE_LOCAL, -+ aarch64_local_sme_state::OFF)) -+ return aarch64_local_sme_state::INACTIVE_LOCAL; -+ -+ /* Treat a conditionally-initialized value as a fully-initialized value. */ -+ if (is_pair (aarch64_local_sme_state::ACTIVE_LIVE, -+ aarch64_local_sme_state::ACTIVE_DEAD)) -+ return aarch64_local_sme_state::ACTIVE_LIVE; -+ -+ return aarch64_local_sme_state::ANY; -+} -+ -+/* Implement TARGET_MODE_CONFLUENCE. */ -+ -+static int -+aarch64_mode_confluence (int entity, int mode1, int mode2) -+{ -+ gcc_assert (mode1 != mode2); -+ switch (aarch64_mode_entity (entity)) -+ { -+ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: -+ return int (aarch64_tristate_mode::MAYBE); -+ -+ case aarch64_mode_entity::LOCAL_SME_STATE: -+ return int (aarch64_local_sme_confluence -+ (aarch64_local_sme_state (mode1), -+ aarch64_local_sme_state (mode2))); -+ } -+ gcc_unreachable (); -+} -+ -+/* Implement TARGET_MODE_BACKPROP for an entity that either stays -+ NO throughput, or makes one transition from NO to YES. */ -+ -+static aarch64_tristate_mode -+aarch64_one_shot_backprop (aarch64_tristate_mode mode1, -+ aarch64_tristate_mode mode2) -+{ -+ /* Keep bringing the transition forward until it starts from NO. */ -+ if (mode1 == aarch64_tristate_mode::MAYBE -+ && mode2 == aarch64_tristate_mode::YES) -+ return mode2; -+ -+ return aarch64_tristate_mode::MAYBE; -+} -+ -+/* Implement TARGET_MODE_BACKPROP for LOCAL_SME_STATE. */ -+ -+static aarch64_local_sme_state -+aarch64_local_sme_backprop (aarch64_local_sme_state mode1, -+ aarch64_local_sme_state mode2) -+{ -+ /* We always need to know what the current state is when transitioning -+ to a new state. Force any location with indeterminate starting state -+ to be active. */ -+ if (mode1 == aarch64_local_sme_state::ANY) -+ switch (mode2) -+ { -+ case aarch64_local_sme_state::INACTIVE_CALLER: -+ case aarch64_local_sme_state::OFF: -+ case aarch64_local_sme_state::ACTIVE_DEAD: -+ /* The current function's ZA state is not live. */ -+ return aarch64_local_sme_state::ACTIVE_DEAD; -+ -+ case aarch64_local_sme_state::INACTIVE_LOCAL: -+ case aarch64_local_sme_state::ACTIVE_LIVE: -+ /* The current function's ZA state is live. */ -+ return aarch64_local_sme_state::ACTIVE_LIVE; -+ -+ case aarch64_local_sme_state::SAVED_LOCAL: -+ /* This is a transition to an exception handler. Since we don't -+ support non-call exceptions for SME functions, the source of -+ the transition must be known. We'll assert later if that's -+ not the case. */ -+ return aarch64_local_sme_state::ANY; -+ -+ case aarch64_local_sme_state::ANY: -+ return aarch64_local_sme_state::ANY; -+ } -+ -+ return aarch64_local_sme_state::ANY; -+} -+ -+/* Implement TARGET_MODE_BACKPROP. */ -+ -+static int -+aarch64_mode_backprop (int entity, int mode1, int mode2) -+{ -+ switch (aarch64_mode_entity (entity)) -+ { -+ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: -+ return int (aarch64_one_shot_backprop (aarch64_tristate_mode (mode1), -+ aarch64_tristate_mode (mode2))); -+ -+ case aarch64_mode_entity::LOCAL_SME_STATE: -+ return int (aarch64_local_sme_backprop -+ (aarch64_local_sme_state (mode1), -+ aarch64_local_sme_state (mode2))); -+ } -+ gcc_unreachable (); -+} -+ -+/* Implement TARGET_MODE_ENTRY. */ -+ -+static int -+aarch64_mode_entry (int entity) -+{ -+ switch (aarch64_mode_entity (entity)) -+ { -+ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: -+ return int (aarch64_tristate_mode::NO); -+ -+ case aarch64_mode_entity::LOCAL_SME_STATE: -+ return int (aarch64_cfun_shared_flags ("za") != 0 -+ ? aarch64_local_sme_state::ACTIVE_LIVE -+ : aarch64_local_sme_state::INACTIVE_CALLER); -+ } -+ gcc_unreachable (); -+} -+ -+/* Implement TARGET_MODE_EXIT. */ -+ -+static int -+aarch64_mode_exit (int entity) -+{ -+ switch (aarch64_mode_entity (entity)) -+ { -+ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: -+ return int (aarch64_tristate_mode::MAYBE); -+ -+ case aarch64_mode_entity::LOCAL_SME_STATE: -+ return int (aarch64_cfun_shared_flags ("za") != 0 -+ ? aarch64_local_sme_state::ACTIVE_LIVE -+ : aarch64_local_sme_state::INACTIVE_CALLER); -+ } -+ gcc_unreachable (); -+} -+ -+/* Implement TARGET_MODE_EH_HANDLER. */ -+ -+static int -+aarch64_mode_eh_handler (int entity) -+{ -+ switch (aarch64_mode_entity (entity)) -+ { -+ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER: -+ /* Require a lazy save buffer to be allocated before the first -+ insn that can throw. */ -+ return int (aarch64_tristate_mode::YES); -+ -+ case aarch64_mode_entity::LOCAL_SME_STATE: -+ return int (aarch64_local_sme_state::SAVED_LOCAL); -+ } -+ gcc_unreachable (); -+} -+ -+/* Implement TARGET_MODE_PRIORITY. */ -+ -+static int -+aarch64_mode_priority (int, int n) -+{ -+ return n; -+} -+ -+/* Implement TARGET_MD_ASM_ADJUST. */ -+ -+static rtx_insn * -+aarch64_md_asm_adjust (vec &outputs, vec &inputs, -+ vec &input_modes, -+ vec &constraints, -+ vec &uses, vec &clobbers, -+ HARD_REG_SET &clobbered_regs, location_t loc) -+{ -+ rtx_insn *seq = arm_md_asm_adjust (outputs, inputs, input_modes, constraints, -+ uses, clobbers, clobbered_regs, loc); -+ -+ /* "za" in the clobber list of a function with ZA state is defined to -+ mean that the asm can read from and write to ZA. We can model the -+ read using a USE, but unfortunately, it's not possible to model the -+ write directly. Use a separate insn to model the effect. -+ -+ We must ensure that ZA is active on entry, which is enforced by using -+ SME_STATE_REGNUM. The asm must ensure that ZA is active on return. */ -+ if (TARGET_ZA) -+ for (unsigned int i = clobbers.length (); i-- > 0; ) -+ { -+ rtx x = clobbers[i]; -+ if (REG_P (x) && REGNO (x) == ZA_REGNUM) -+ { -+ auto id = cfun->machine->next_asm_update_za_id++; -+ -+ start_sequence (); -+ if (seq) -+ emit_insn (seq); -+ emit_insn (gen_aarch64_asm_update_za (gen_int_mode (id, SImode))); -+ seq = get_insns (); -+ end_sequence (); -+ -+ uses.safe_push (gen_rtx_REG (VNx16QImode, ZA_REGNUM)); -+ uses.safe_push (gen_rtx_REG (DImode, SME_STATE_REGNUM)); -+ -+ clobbers.ordered_remove (i); -+ CLEAR_HARD_REG_BIT (clobbered_regs, ZA_REGNUM); -+ } -+ } -+ return seq; -+} -+ - /* If CALL involves a change in PSTATE.SM, emit the instructions needed - to switch to the new mode and the instructions needed to restore the - original mode. Return true if something changed. */ -@@ -29108,6 +30415,9 @@ aarch64_get_v16qi_mode () - #undef TARGET_START_CALL_ARGS - #define TARGET_START_CALL_ARGS aarch64_start_call_args - -+#undef TARGET_END_CALL_ARGS -+#define TARGET_END_CALL_ARGS aarch64_end_call_args -+ - #undef TARGET_GIMPLE_FOLD_BUILTIN - #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin - -@@ -29473,6 +30783,9 @@ aarch64_libgcc_floating_mode_supported_p - #undef TARGET_COMP_TYPE_ATTRIBUTES - #define TARGET_COMP_TYPE_ATTRIBUTES aarch64_comp_type_attributes - -+#undef TARGET_MERGE_DECL_ATTRIBUTES -+#define TARGET_MERGE_DECL_ATTRIBUTES aarch64_merge_decl_attributes -+ - #undef TARGET_GET_MULTILIB_ABI_NAME - #define TARGET_GET_MULTILIB_ABI_NAME aarch64_get_multilib_abi_name - -@@ -29493,8 +30806,35 @@ aarch64_libgcc_floating_mode_supported_p - #undef TARGET_STRICT_ARGUMENT_NAMING - #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true - -+#undef TARGET_MODE_EMIT -+#define TARGET_MODE_EMIT aarch64_mode_emit -+ -+#undef TARGET_MODE_NEEDED -+#define TARGET_MODE_NEEDED aarch64_mode_needed -+ -+#undef TARGET_MODE_AFTER -+#define TARGET_MODE_AFTER aarch64_mode_after -+ -+#undef TARGET_MODE_CONFLUENCE -+#define TARGET_MODE_CONFLUENCE aarch64_mode_confluence -+ -+#undef TARGET_MODE_BACKPROP -+#define TARGET_MODE_BACKPROP aarch64_mode_backprop -+ -+#undef TARGET_MODE_ENTRY -+#define TARGET_MODE_ENTRY aarch64_mode_entry -+ -+#undef TARGET_MODE_EXIT -+#define TARGET_MODE_EXIT aarch64_mode_exit -+ -+#undef TARGET_MODE_EH_HANDLER -+#define TARGET_MODE_EH_HANDLER aarch64_mode_eh_handler -+ -+#undef TARGET_MODE_PRIORITY -+#define TARGET_MODE_PRIORITY aarch64_mode_priority -+ - #undef TARGET_MD_ASM_ADJUST --#define TARGET_MD_ASM_ADJUST arm_md_asm_adjust -+#define TARGET_MD_ASM_ADJUST aarch64_md_asm_adjust - - #undef TARGET_ASM_FILE_END - #define TARGET_ASM_FILE_END aarch64_asm_file_end -@@ -29505,6 +30845,9 @@ aarch64_libgcc_floating_mode_supported_p - #undef TARGET_HAVE_SHADOW_CALL_STACK - #define TARGET_HAVE_SHADOW_CALL_STACK true - -+#undef TARGET_EXTRA_LIVE_ON_ENTRY -+#define TARGET_EXTRA_LIVE_ON_ENTRY aarch64_extra_live_on_entry -+ - #undef TARGET_EMIT_EPILOGUE_FOR_SIBCALL - #define TARGET_EMIT_EPILOGUE_FOR_SIBCALL aarch64_expand_epilogue - -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 6bfe55968..89d30b9bf 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -207,6 +207,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - /* Macros to test ISA flags. */ - - #define AARCH64_ISA_SM_OFF (aarch64_isa_flags & AARCH64_FL_SM_OFF) -+#define AARCH64_ISA_ZA_ON (aarch64_isa_flags & AARCH64_FL_ZA_ON) - #define AARCH64_ISA_MODE (aarch64_isa_flags & AARCH64_FL_ISA_MODES) - #define AARCH64_ISA_CRC (aarch64_isa_flags & AARCH64_FL_CRC) - #define AARCH64_ISA_CRYPTO (aarch64_isa_flags & AARCH64_FL_CRYPTO) -@@ -259,6 +260,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - #define TARGET_STREAMING_COMPATIBLE \ - ((aarch64_isa_flags & AARCH64_FL_SM_STATE) == 0) - -+/* PSTATE.ZA is enabled in the current function body. */ -+#define TARGET_ZA (AARCH64_ISA_ZA_ON) -+ - /* Crypto is an optional extension to AdvSIMD. */ - #define TARGET_CRYPTO (AARCH64_ISA_CRYPTO) - -@@ -445,7 +449,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - 1, 1, 1, 1, /* SFP, AP, CC, VG */ \ - 0, 0, 0, 0, 0, 0, 0, 0, /* P0 - P7 */ \ - 0, 0, 0, 0, 0, 0, 0, 0, /* P8 - P15 */ \ -- 1, 1 /* FFR and FFRT */ \ -+ 1, 1, /* FFR and FFRT */ \ -+ 1, 1, 1, 1, 1, 1, 1 /* Fake registers */ \ - } - - /* X30 is marked as caller-saved which is in line with regular function call -@@ -455,7 +460,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - true but not until function epilogues have been generated. This ensures - that X30 is available for use in leaf functions if needed. */ - --#define CALL_USED_REGISTERS \ -+#define CALL_REALLY_USED_REGISTERS \ - { \ - 1, 1, 1, 1, 1, 1, 1, 1, /* R0 - R7 */ \ - 1, 1, 1, 1, 1, 1, 1, 1, /* R8 - R15 */ \ -@@ -468,7 +473,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - 1, 1, 1, 0, /* SFP, AP, CC, VG */ \ - 1, 1, 1, 1, 1, 1, 1, 1, /* P0 - P7 */ \ - 1, 1, 1, 1, 1, 1, 1, 1, /* P8 - P15 */ \ -- 1, 1 /* FFR and FFRT */ \ -+ 1, 1, /* FFR and FFRT */ \ -+ 0, 0, 0, 0, 0, 0, 0 /* Fake registers */ \ - } - - #define REGISTER_NAMES \ -@@ -484,7 +490,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - "sfp", "ap", "cc", "vg", \ - "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", \ - "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", \ -- "ffr", "ffrt" \ -+ "ffr", "ffrt", \ -+ "lowering", "tpidr2_block", "sme_state", "tpidr2_setup", \ -+ "za_free", "za_saved", "za" \ - } - - /* Generate the register aliases for core register N */ -@@ -533,7 +541,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - #define FRAME_POINTER_REGNUM SFP_REGNUM - #define STACK_POINTER_REGNUM SP_REGNUM - #define ARG_POINTER_REGNUM AP_REGNUM --#define FIRST_PSEUDO_REGISTER (FFRT_REGNUM + 1) -+#define FIRST_PSEUDO_REGISTER (LAST_FAKE_REGNUM + 1) - - /* The number of argument registers available for each class. */ - #define NUM_ARG_REGS 8 -@@ -657,6 +665,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - - #define FP_SIMD_SAVED_REGNUM_P(REGNO) \ - (((unsigned) (REGNO - V8_REGNUM)) <= (V23_REGNUM - V8_REGNUM)) -+ -+#define FAKE_REGNUM_P(REGNO) \ -+ IN_RANGE (REGNO, FIRST_FAKE_REGNUM, LAST_FAKE_REGNUM) - - /* Register and constant classes. */ - -@@ -677,6 +688,7 @@ enum reg_class - PR_REGS, - FFR_REGS, - PR_AND_FFR_REGS, -+ FAKE_REGS, - ALL_REGS, - LIM_REG_CLASSES /* Last */ - }; -@@ -700,6 +712,7 @@ enum reg_class - "PR_REGS", \ - "FFR_REGS", \ - "PR_AND_FFR_REGS", \ -+ "FAKE_REGS", \ - "ALL_REGS" \ - } - -@@ -720,6 +733,7 @@ enum reg_class - { 0x00000000, 0x00000000, 0x000ffff0 }, /* PR_REGS */ \ - { 0x00000000, 0x00000000, 0x00300000 }, /* FFR_REGS */ \ - { 0x00000000, 0x00000000, 0x003ffff0 }, /* PR_AND_FFR_REGS */ \ -+ { 0x00000000, 0x00000000, 0x1fc00000 }, /* FAKE_REGS */ \ - { 0xffffffff, 0xffffffff, 0x000fffff } /* ALL_REGS */ \ - } - -@@ -920,6 +934,15 @@ typedef struct GTY (()) machine_function - bool reg_is_wrapped_separately[LAST_SAVED_REGNUM]; - /* One entry for each general purpose register. */ - rtx call_via[SP_REGNUM]; -+ -+ /* A pseudo register that points to the function's TPIDR2 block, or null -+ if the function doesn't have a TPIDR2 block. */ -+ rtx tpidr2_block; -+ -+ /* A pseudo register that points to the function's ZA save buffer, -+ or null if none. */ -+ rtx za_save_buffer; -+ - bool label_is_assembled; - - /* True if we've expanded at least one call to a function that changes -@@ -927,6 +950,10 @@ typedef struct GTY (()) machine_function - guarantees that no such mode switch exists. */ - bool call_switches_pstate_sm; - -+ /* Used to generated unique identifiers for each update to ZA by an -+ asm statement. */ -+ unsigned int next_asm_update_za_id; -+ - /* A set of all decls that have been passed to a vld1 intrinsic in the - current function. This is used to help guide the vector cost model. */ - hash_set *vector_load_decls; -@@ -996,6 +1023,10 @@ typedef struct - bool silent_p; /* True if we should act silently, rather than - raise an error for invalid calls. */ - -+ /* AARCH64_STATE_* flags that describe whether the function shares ZA -+ with its callers. */ -+ unsigned int shared_za_flags; -+ - /* A list of registers that need to be saved and restored around a - change to PSTATE.SM. An auto_vec would be more convenient, but those - can't be copied. */ -@@ -1344,4 +1375,61 @@ extern poly_uint16 aarch64_sve_vg; - STACK_BOUNDARY / BITS_PER_UNIT) \ - : (crtl->outgoing_args_size + STACK_POINTER_OFFSET)) - -+#ifndef USED_FOR_TARGET -+ -+/* Enumerates the mode-switching "entities" for AArch64. */ -+enum class aarch64_mode_entity : int -+{ -+ /* An aarch64_tristate_mode that says whether we have created a local -+ save buffer for the current function's ZA state. The only transition -+ is from NO to YES. */ -+ HAVE_ZA_SAVE_BUFFER, -+ -+ /* An aarch64_local_sme_state that reflects the state of all data -+ controlled by PSTATE.ZA. */ -+ LOCAL_SME_STATE -+}; -+ -+/* Describes the state of all data controlled by PSTATE.ZA */ -+enum class aarch64_local_sme_state : int -+{ -+ /* ZA is in the off or dormant state. If it is dormant, the contents -+ of ZA belong to a caller. */ -+ INACTIVE_CALLER, -+ -+ /* ZA is in the off state: PSTATE.ZA is 0 and TPIDR2_EL0 is null. */ -+ OFF, -+ -+ /* ZA is in the off or dormant state. If it is dormant, the contents -+ of ZA belong to the current function. */ -+ INACTIVE_LOCAL, -+ -+ /* ZA is in the off state and the current function's ZA contents are -+ stored in the lazy save buffer. This is the state on entry to -+ exception handlers. */ -+ SAVED_LOCAL, -+ -+ /* ZA is in the active state: PSTATE.ZA is 1 and TPIDR2_EL0 is null. -+ The contents of ZA are live. */ -+ ACTIVE_LIVE, -+ -+ /* ZA is in the active state: PSTATE.ZA is 1 and TPIDR2_EL0 is null. -+ The contents of ZA are dead. */ -+ ACTIVE_DEAD, -+ -+ /* ZA could be in multiple states. */ -+ ANY -+}; -+ -+enum class aarch64_tristate_mode : int { NO, YES, MAYBE }; -+ -+#define OPTIMIZE_MODE_SWITCHING(ENTITY) \ -+ aarch64_optimize_mode_switching (aarch64_mode_entity (ENTITY)) -+ -+#define NUM_MODES_FOR_MODE_SWITCHING \ -+ { int (aarch64_tristate_mode::MAYBE), \ -+ int (aarch64_local_sme_state::ANY) } -+ -+#endif -+ - #endif /* GCC_AARCH64_H */ -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index bb867de74..05a7c6675 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -111,6 +111,56 @@ - ;; "FFR token": a fake register used for representing the scheduling - ;; restrictions on FFR-related operations. - (FFRT_REGNUM 85) -+ -+ ;; ---------------------------------------------------------------- -+ ;; Fake registers -+ ;; ---------------------------------------------------------------- -+ ;; These registers represent abstract things, rather than real -+ ;; architected registers. -+ -+ ;; Sometimes we use placeholder instructions to mark where later -+ ;; ABI-related lowering is needed. These placeholders read and -+ ;; write this register. Instructions that depend on the lowering -+ ;; read the register. -+ (LOWERING_REGNUM 86) -+ -+ ;; Represents the contents of the current function's TPIDR2 block, -+ ;; in abstract form. -+ (TPIDR2_BLOCK_REGNUM 87) -+ -+ ;; Holds the value that the current function wants PSTATE.ZA to be. -+ ;; The actual value can sometimes vary, because it does not track -+ ;; changes to PSTATE.ZA that happen during a lazy save and restore. -+ ;; Those effects are instead tracked by ZA_SAVED_REGNUM. -+ (SME_STATE_REGNUM 88) -+ -+ ;; Instructions write to this register if they set TPIDR2_EL0 to a -+ ;; well-defined value. Instructions read from the register if they -+ ;; depend on the result of such writes. -+ ;; -+ ;; The register does not model the architected TPIDR2_ELO, just the -+ ;; current function's management of it. -+ (TPIDR2_SETUP_REGNUM 89) -+ -+ ;; Represents the property "has an incoming lazy save been committed?". -+ (ZA_FREE_REGNUM 90) -+ -+ ;; Represents the property "are the current function's ZA contents -+ ;; stored in the lazy save buffer, rather than in ZA itself?". -+ (ZA_SAVED_REGNUM 91) -+ -+ ;; Represents the contents of the current function's ZA state in -+ ;; abstract form. At various times in the function, these contents -+ ;; might be stored in ZA itself, or in the function's lazy save buffer. -+ ;; -+ ;; The contents persist even when the architected ZA is off. Private-ZA -+ ;; functions have no effect on its contents. -+ (ZA_REGNUM 92) -+ ;; ---------------------------------------------------------------- -+ (FIRST_FAKE_REGNUM LOWERING_REGNUM) -+ (LAST_FAKE_REGNUM ZA_REGNUM) -+ ;; ---------------------------------------------------------------- -+ - ;; The pair of scratch registers used for stack probing with -fstack-check. - ;; Leave R9 alone as a possible choice for the static chain. - ;; Note that the use of these registers is mutually exclusive with the use -@@ -303,7 +353,12 @@ - UNSPEC_TAG_SPACE ; Translate address to MTE tag address space. - UNSPEC_LD1RO - UNSPEC_SALT_ADDR -+ UNSPEC_SAVE_NZCV -+ UNSPEC_RESTORE_NZCV - UNSPECV_PATCHABLE_AREA -+ ;; Wraps a constant integer that should be multiplied by the number -+ ;; of quadwords in an SME vector. -+ UNSPEC_SME_VQ - ]) - - (define_c_enum "unspecv" [ -@@ -379,7 +434,7 @@ - ;; Q registers and is equivalent to "simd". - - (define_enum "arches" [any rcpc8_4 fp fp_q base_simd nobase_simd -- simd nosimd sve fp16]) -+ simd nosimd sve fp16 sme]) - - (define_enum_attr "arch" "arches" (const_string "any")) - -@@ -423,7 +478,10 @@ - (match_test "TARGET_FP_F16INST")) - - (and (eq_attr "arch" "sve") -- (match_test "TARGET_SVE"))) -+ (match_test "TARGET_SVE")) -+ -+ (and (eq_attr "arch" "sme") -+ (match_test "TARGET_SME"))) - (const_string "yes") - (const_string "no"))) - -@@ -928,7 +986,7 @@ - (set_attr "sls_length" "retbr")] - ) - --(define_insn "*cb1" -+(define_insn "aarch64_cb1" - [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") - (const_int 0)) - (label_ref (match_operand 1 "" "")) -@@ -1291,6 +1349,7 @@ - /* The "mov_imm" type for CNT is just a placeholder. */ - [r , Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]); - [r , Usr; mov_imm , sve, 4] << aarch64_output_sve_rdvl (operands[1]); -+ [r , UsR; mov_imm , sme, 4] << aarch64_output_rdsvl (operands[1]); - [r , m ; load_4 , * , 4] ldr\t%w0, %1 - [w , m ; load_4 , fp , 4] ldr\t%s0, %1 - [m , r Z; store_4 , * , 4] str\t%w1, %0 -@@ -1326,6 +1385,7 @@ - /* The "mov_imm" type for CNT is just a placeholder. */ - [r, Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]); - [r, Usr; mov_imm , sve, 4] << aarch64_output_sve_rdvl (operands[1]); -+ [r, UsR; mov_imm , sme, 4] << aarch64_output_rdsvl (operands[1]); - [r, m ; load_8 , * , 4] ldr\t%x0, %1 - [w, m ; load_8 , fp , 4] ldr\t%d0, %1 - [m, r Z; store_8 , * , 4] str\t%x1, %0 -@@ -7733,6 +7793,21 @@ - [(set (attr "length") (symbol_ref "INTVAL (operands[0])"))] - ) - -+(define_insn "aarch64_save_nzcv" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (unspec:DI [(reg:CC CC_REGNUM)] UNSPEC_SAVE_NZCV))] -+ "" -+ "mrs\t%0, nzcv" -+) -+ -+(define_insn "aarch64_restore_nzcv" -+ [(set (reg:CC CC_REGNUM) -+ (unspec:CC [(match_operand:DI 0 "register_operand" "r")] -+ UNSPEC_RESTORE_NZCV))] -+ "" -+ "msr\tnzcv, %0" -+) -+ - ;; AdvSIMD Stuff - (include "aarch64-simd.md") - -diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md -index 212a73416..88fb9a07c 100644 ---- a/gcc/config/aarch64/constraints.md -+++ b/gcc/config/aarch64/constraints.md -@@ -220,6 +220,12 @@ - (and (match_code "const_poly_int") - (match_test "aarch64_sve_rdvl_immediate_p (op)"))) - -+(define_constraint "UsR" -+ "@internal -+ A constraint that matches a value produced by RDSVL." -+ (and (match_code "const") -+ (match_test "aarch64_rdsvl_immediate_p (op)"))) -+ - (define_constraint "Usv" - "@internal - A constraint that matches a VG-based constant that can be loaded by -diff --git a/gcc/testsuite/g++.target/aarch64/sme/exceptions_1.C b/gcc/testsuite/g++.target/aarch64/sme/exceptions_1.C -new file mode 100644 -index 000000000..a245546d8 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sme/exceptions_1.C -@@ -0,0 +1,189 @@ -+// { dg-options "-O -fno-optimize-sibling-calls" } -+// { dg-final { check-function-bodies "**" "" } } -+ -+void callee_inout() __arm_inout("za"); -+void callee_in() noexcept __arm_in("za"); -+void callee_out() noexcept __arm_out("za"); -+void callee_normal(); -+ -+/* -+** _Z5test1v: -+** ... -+** bl __arm_tpidr2_save -+** ... -+** bl __cxa_begin_catch -+** bl __cxa_end_catch -+** mov w0, #?2 -+** ... -+*/ -+__arm_new("za") int -+test1 () -+{ -+ try -+ { -+ callee_inout(); -+ return 1; -+ } -+ catch (...) -+ { -+ return 2; -+ } -+} -+ -+/* -+** _Z5test2v: -+** ... -+** bl __arm_tpidr2_save -+** ... -+** bl __cxa_begin_catch -+** smstart za -+** bl _Z10callee_outv -+** bl _Z9callee_inv -+** smstop za -+** bl __cxa_end_catch -+** mov w0, #?2 -+** ... -+*/ -+__arm_new("za") int -+test2 () -+{ -+ try -+ { -+ callee_inout(); -+ return 1; -+ } -+ catch (...) -+ { -+ callee_out(); -+ callee_in(); -+ return 2; -+ } -+} -+ -+/* -+** _Z5test3v: -+** ... -+** bl __arm_tpidr2_save -+** ... -+** smstop za -+** ... -+** bl _Z13callee_normalv -+** ... -+** bl __cxa_begin_catch -+** smstart za -+** bl _Z10callee_outv -+** bl _Z9callee_inv -+** smstop za -+** bl __cxa_end_catch -+** mov w0, #?2 -+** ... -+*/ -+__arm_new("za") int -+test3 () -+{ -+ try -+ { -+ callee_normal(); -+ return 1; -+ } -+ catch (...) -+ { -+ callee_out(); -+ callee_in(); -+ return 2; -+ } -+} -+ -+__arm_new("za") int -+test4 () -+{ -+ try -+ { -+ // No lazy save set up because this is a shared-ZA function. -+ callee_inout(); -+ return 1; -+ } -+ catch (...) -+ { -+ callee_inout(); -+ return 2; -+ } -+} -+// { dg-final { scan-assembler {_Z5test4v:(?:(?!msr\ttpidr2_el0, x[0-9]+).)*\tret} } } -+ -+/* -+** _Z5test5v: -+** ... -+** bl __arm_tpidr2_save -+** ... -+** smstart za -+** ... -+** bl _Z12callee_inoutv -+** add (x[0-9]+), [^\n]+ -+** msr tpidr2_el0, \1 -+** bl _Z13callee_normalv -+** msr tpidr2_el0, xzr -+** smstop za -+** ... -+** bl __cxa_begin_catch -+** ... -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** smstart za -+** ... -+** bl __arm_tpidr2_restore -+** msr tpidr2_el0, xzr -+** bl _Z12callee_inoutv -+** smstop za -+** bl __cxa_end_catch -+** mov w0, #?2 -+** ... -+*/ -+__arm_new("za") int -+test5 () -+{ -+ try -+ { -+ callee_inout(); -+ callee_normal(); -+ return 1; -+ } -+ catch (...) -+ { -+ callee_inout(); -+ return 2; -+ } -+} -+ -+/* -+** _Z5test6v: -+** ... -+** msr tpidr2_el0, x[0-9]+ -+** bl _Z13callee_normalv -+** msr tpidr2_el0, xzr -+** ... -+** bl __cxa_begin_catch -+** bl __cxa_end_catch -+** ... -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** smstart za -+** ... -+** bl __arm_tpidr2_restore -+** msr tpidr2_el0, xzr -+** ... -+*/ -+int -+test6 () __arm_inout("za") -+{ -+ try -+ { -+ callee_normal(); -+ callee_out(); -+ return 1; -+ } -+ catch (...) -+ { -+ return 2; -+ } -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C b/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C -index 032485adf..8b0755014 100644 ---- a/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C -+++ b/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C -@@ -2,3 +2,8 @@ - - void f1 () __arm_streaming; - void f2 () __arm_streaming_compatible; -+void f3 () __arm_in("za"); -+void f4 () __arm_out("za"); -+void f5 () __arm_inout("za"); -+void f6 () __arm_preserves("za"); -+__arm_new("za") void f7 () {} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c b/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c -index 8f1b83676..fcabe3edc 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c -@@ -2,3 +2,8 @@ - - void f1 () __arm_streaming; - void f2 () __arm_streaming_compatible; -+void f3 () __arm_in("za"); -+void f4 () __arm_out("za"); -+void f5 () __arm_inout("za"); -+void f6 () __arm_preserves("za"); -+__arm_new("za") void f7 () {} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_1.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_1.c -new file mode 100644 -index 000000000..856880e21 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_1.c -@@ -0,0 +1,154 @@ -+// { dg-options "" } -+ -+void shared_a () [[arm::inout("za")]]; -+void shared_a (); // { dg-error "conflicting types" } -+ -+void shared_b (); -+void shared_b () [[arm::inout("za")]]; // { dg-error "conflicting types" } -+ -+void shared_c () [[arm::inout("za")]]; -+void shared_c () {} // Inherits attribute from declaration (confusingly). -+ -+void shared_d (); -+void shared_d () [[arm::inout("za")]] {} // { dg-error "conflicting types" } -+ -+void shared_e () [[arm::inout("za")]] {} -+void shared_e (); // { dg-error "conflicting types" } -+ -+void shared_f () {} -+void shared_f () [[arm::inout("za")]]; // { dg-error "conflicting types" } -+ -+extern void (*shared_g) (); -+extern void (*shared_g) () [[arm::inout("za")]]; // { dg-error "conflicting types" } -+ -+extern void (*shared_h) () [[arm::inout("za")]]; -+extern void (*shared_h) (); // { dg-error "conflicting types" } -+ -+//---------------------------------------------------------------------------- -+ -+void preserved_a () [[arm::preserves("za")]]; -+void preserved_a (); // { dg-error "conflicting types" } -+ -+void preserved_b (); -+void preserved_b () [[arm::preserves("za")]]; // { dg-error "conflicting types" } -+ -+void preserved_c () [[arm::preserves("za")]]; -+void preserved_c () {} // Inherits attribute from declaration (confusingly). -+ -+void preserved_d (); -+void preserved_d () [[arm::preserves("za")]] {} // { dg-error "conflicting types" } -+ -+void preserved_e () [[arm::preserves("za")]] {} -+void preserved_e (); // { dg-error "conflicting types" } -+ -+void preserved_f () {} -+void preserved_f () [[arm::preserves("za")]]; // { dg-error "conflicting types" } -+ -+extern void (*preserved_g) (); -+extern void (*preserved_g) () [[arm::preserves("za")]]; // { dg-error "conflicting types" } -+ -+extern void (*preserved_h) () [[arm::preserves("za")]]; -+extern void (*preserved_h) (); // { dg-error "conflicting types" } -+ -+//---------------------------------------------------------------------------- -+ -+void replicated_1 () [[arm::in("za", "za"), arm::in("za")]]; -+void replicated_2 () [[arm::out("za", "za"), arm::out("za")]]; -+void replicated_3 () [[arm::inout("za", "za"), arm::inout("za")]]; -+void replicated_4 () [[arm::preserves("za", "za"), arm::preserves("za")]]; -+ -+//---------------------------------------------------------------------------- -+ -+void invalid_1 () [[arm::in]]; // { dg-error "wrong number of arguments" } -+void invalid_2 () [[arm::in()]]; // { dg-error "parentheses must be omitted" } -+ // { dg-error "wrong number of arguments" "" { target *-*-* } .-1 } -+void invalid_3 () [[arm::in("")]]; // { dg-error "unrecognized state string ''" } -+void invalid_4 () [[arm::in("foo")]]; // { dg-error "unrecognized state string 'foo'" } -+void invalid_5 () [[arm::in(42)]]; // { dg-error "the arguments to 'in' must be constant strings" } -+void invalid_6 () [[arm::in(*(int *)0 ? "za" : "za")]]; // { dg-error "the arguments to 'in' must be constant strings" } -+ -+//---------------------------------------------------------------------------- -+ -+void mixed_a () [[arm::preserves("za")]]; -+void mixed_a () [[arm::inout("za")]]; // { dg-error "conflicting types" } -+ -+void mixed_b () [[arm::inout("za")]]; -+void mixed_b () [[arm::preserves("za")]]; // { dg-error "conflicting types" } -+ -+void mixed_c () [[arm::preserves("za")]]; -+void mixed_c () [[arm::in("za")]] {} // { dg-error "conflicting types" } -+ -+void mixed_d () [[arm::inout("za")]]; -+void mixed_d () [[arm::in("za")]] {} // { dg-error "conflicting types" } -+ -+void mixed_e () [[arm::out("za")]] {} -+void mixed_e () [[arm::in("za")]]; // { dg-error "conflicting types" } -+ -+void mixed_f () [[arm::inout("za")]] {} -+void mixed_f () [[arm::out("za")]]; // { dg-error "conflicting types" } -+ -+extern void (*mixed_g) () [[arm::in("za")]]; -+extern void (*mixed_g) () [[arm::preserves("za")]]; // { dg-error "conflicting types" } -+ -+extern void (*mixed_h) () [[arm::preserves("za")]]; -+extern void (*mixed_h) () [[arm::out("za")]]; // { dg-error "conflicting types" } -+ -+//---------------------------------------------------------------------------- -+ -+void contradiction_1 () [[arm::preserves("za"), arm::inout("za")]]; // { dg-error "inconsistent attributes for state 'za'" } -+void contradiction_2 () [[arm::inout("za"), arm::preserves("za")]]; // { dg-error "inconsistent attributes for state 'za'" } -+ -+int [[arm::inout("za")]] int_attr; // { dg-warning "only applies to function types" } -+void *[[arm::preserves("za")]] ptr_attr; // { dg-warning "only applies to function types" } -+ -+typedef void preserved_callback () [[arm::preserves("za")]]; -+typedef void shared_callback () [[arm::inout("za")]]; -+ -+void (*preserved_callback_ptr) () [[arm::preserves("za")]]; -+void (*shared_callback_ptr) () [[arm::inout("za")]]; -+ -+typedef void contradiction_callback_1 () [[arm::preserves("za"), arm::inout("za")]]; // { dg-error "inconsistent attributes for state 'za'" } -+typedef void contradiction_callback_2 () [[arm::inout("za"), arm::preserves("za")]]; // { dg-error "inconsistent attributes for state 'za'" } -+ -+void (*contradiction_callback_ptr_1) () [[arm::preserves("za"), arm::inout("za")]]; // { dg-error "inconsistent attributes for state 'za'" } -+void (*contradiction_callback_ptr_2) () [[arm::inout("za"), arm::preserves("za")]]; // { dg-error "inconsistent attributes for state 'za'" } -+ -+struct s { -+ void (*contradiction_callback_ptr_1) () [[arm::preserves("za"), arm::inout("za")]]; // { dg-error "inconsistent attributes for state 'za'" } -+ void (*contradiction_callback_ptr_2) () [[arm::inout("za"), arm::preserves("za")]]; // { dg-error "inconsistent attributes for state 'za'" } -+}; -+ -+//---------------------------------------------------------------------------- -+ -+void keyword_ok_1 () __arm_inout("za"); -+void keyword_ok_1 () __arm_inout("za"); -+ -+void keyword_ok_2 () __arm_in("za"); -+void keyword_ok_2 () [[arm::in("za")]]; -+ -+void keyword_ok_3 () [[arm::out("za")]]; -+void keyword_ok_3 () __arm_out("za"); -+ -+void keyword_ok_4 () __arm_inout("za") [[arm::inout("za")]]; -+ -+void keyword_ok_5 () __arm_preserves("za"); -+void keyword_ok_5 () [[arm::preserves("za")]]; -+ -+__arm_new("za") void keyword_ok_6 () {} -+ -+//---------------------------------------------------------------------------- -+ -+void keyword_conflict_1 () __arm_inout("za"); -+void keyword_conflict_1 (); // { dg-error "conflicting types" } -+ -+void keyword_conflict_2 (); -+void keyword_conflict_2 () __arm_inout("za"); // { dg-error "conflicting types" } -+ -+void keyword_conflict_3 () __arm_inout("za"); -+void keyword_conflict_3 () [[arm::preserves("za")]]; // { dg-error "conflicting types" } -+ -+void keyword_conflict_4 () [[arm::preserves("za")]]; -+void keyword_conflict_4 () __arm_inout("za"); // { dg-error "conflicting types" } -+ -+__arm_new("za") void keyword_conflict_5 () __arm_inout("za") {} // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } -+__arm_new("za") void keyword_conflict_6 () __arm_preserves("za") {} // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_2.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_2.c -new file mode 100644 -index 000000000..572ff309f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_2.c -@@ -0,0 +1,73 @@ -+// { dg-options "" } -+ -+[[arm::new("za")]] void new_za_a (); -+void new_za_a (); -+ -+void new_za_b (); -+[[arm::new("za")]] void new_za_b (); -+ -+[[arm::new("za")]] void new_za_c (); -+void new_za_c () {} -+ -+void new_za_d (); -+[[arm::new("za")]] void new_za_d () {} -+ -+[[arm::new("za")]] void new_za_e () {} -+void new_za_e (); -+ -+void new_za_f () {} -+[[arm::new("za")]] void new_za_f (); // { dg-error "cannot apply attribute 'new' to 'new_za_f' after the function has been defined" } -+ -+//---------------------------------------------------------------------------- -+ -+[[arm::new("za")]] void shared_a (); -+void shared_a () [[arm::inout("za")]]; // { dg-error "conflicting types" } -+ -+void shared_b () [[arm::inout("za")]]; -+[[arm::new("za")]] void shared_b (); // { dg-error "conflicting types" } -+ -+[[arm::new("za")]] void shared_c (); -+void shared_c () [[arm::in("za")]] {} // { dg-error "conflicting types" } -+ -+void shared_d () [[arm::in("za")]]; -+[[arm::new("za")]] void shared_d () {} // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } -+ -+[[arm::new("za")]] void shared_e () {} -+void shared_e () [[arm::out("za")]]; // { dg-error "conflicting types" } -+ -+void shared_f () [[arm::out("za")]] {} -+[[arm::new("za")]] void shared_f (); // { dg-error "conflicting types" } -+ -+[[arm::new("za")]] void shared_g () {} -+void shared_g () [[arm::preserves("za")]]; // { dg-error "conflicting types" } -+ -+void shared_h () [[arm::preserves("za")]] {} -+[[arm::new("za")]] void shared_h (); // { dg-error "conflicting types" } -+ -+//---------------------------------------------------------------------------- -+ -+[[arm::new("za")]] void contradiction_1 () [[arm::inout("za")]]; // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } -+void contradiction_2 [[arm::new("za")]] () [[arm::inout("za")]]; // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } -+[[arm::new("za")]] void contradiction_3 () [[arm::preserves("za")]]; // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } -+void contradiction_4 [[arm::new("za")]] () [[arm::preserves("za")]]; // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" } -+ -+int [[arm::new("za")]] int_attr; // { dg-warning "does not apply to types" } -+[[arm::new("za")]] int int_var_attr; // { dg-error "applies only to function definitions" } -+typedef void new_za_callback () [[arm::new("za")]]; // { dg-warning "does not apply to types" } -+[[arm::new("za")]] void (*new_za_var_callback) (); // { dg-error "applies only to function definitions" } -+ -+//---------------------------------------------------------------------------- -+ -+[[arm::new("za")]] void complementary_1 () [[arm::streaming]] {} -+void complementary_2 [[arm::new("za")]] () [[arm::streaming]] {} -+[[arm::new("za")]] void complementary_3 () [[arm::streaming_compatible]] {} -+void complementary_4 [[arm::new("za")]] () [[arm::streaming_compatible]] {} -+ -+//---------------------------------------------------------------------------- -+ -+#pragma GCC target "+nosme" -+ -+[[arm::new("za")]] void bereft_1 (); -+[[arm::new("za")]] void bereft_2 () {} // { dg-error "functions with SME state require the ISA extension 'sme'" } -+void bereft_3 () [[arm::inout("za")]]; -+void bereft_4 () [[arm::inout("za")]] {} // { dg-error "functions with SME state require the ISA extension 'sme'" } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_3.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_3.c -new file mode 100644 -index 000000000..203f6ae8a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_3.c -@@ -0,0 +1,31 @@ -+// { dg-options "" } -+ -+void normal_callee (); -+void in_callee () [[arm::in("za")]]; -+void out_callee () [[arm::out("za")]]; -+void inout_callee () [[arm::inout("za")]]; -+void preserves_callee () [[arm::preserves("za")]]; -+ -+struct callbacks { -+ void (*normal_ptr) (); -+ void (*in_ptr) () [[arm::in("za")]]; -+ void (*out_ptr) () [[arm::out("za")]]; -+ void (*inout_ptr) () [[arm::inout("za")]]; -+ void (*preserves_ptr) () [[arm::preserves("za")]]; -+}; -+ -+void -+normal_caller (struct callbacks *c) -+{ -+ normal_callee (); -+ in_callee (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} } -+ out_callee (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} } -+ inout_callee (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} } -+ preserves_callee (); // { dg-error {call to a function that shares SME state from a function that has no SME state} } -+ -+ c->normal_ptr (); -+ c->in_ptr (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} } -+ c->out_ptr (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} } -+ c->inout_ptr (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} } -+ c->preserves_ptr (); // { dg-error {call to a function that shares SME state from a function that has no SME state} } -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c -new file mode 100644 -index 000000000..cec0abf0e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c -@@ -0,0 +1,585 @@ -+// { dg-options "-O -fno-optimize-sibling-calls" } -+// { dg-final { check-function-bodies "**" "" } } -+ -+void private_za(); -+void out_za() __arm_out("za"); -+void in_za() __arm_in("za"); -+void inout_za() __arm_inout("za"); -+void preserves_za() __arm_preserves("za"); -+ -+/* -+** test1: -+** ret -+*/ -+__arm_new("za") void test1() -+{ -+} -+ -+/* -+** test2: -+** ldr w0, \[x0\] -+** ret -+*/ -+__arm_new("za") int test2(int *ptr) -+{ -+ return *ptr; -+} -+ -+/* -+** test3: -+** stp [^\n]+ -+** mov x29, sp -+** bl private_za -+** ( -+** mov w0, 0 -+** ldp [^\n]+ -+** | -+** ldp [^\n]+ -+** mov w0, 0 -+** ) -+** ret -+*/ -+__arm_new("za") int test3() -+{ -+ private_za(); -+ return 0; -+} -+ -+/* -+** test4: -+** ... -+** mrs x0, tpidr2_el0 -+** cbz x0, [^\n]+ -+** bl __arm_tpidr2_save -+** msr tpidr2_el0, xzr -+** zero { za } -+** smstart za -+** bl in_za -+** smstop za -+** ldp [^\n]+ -+** ret -+*/ -+__arm_new("za") void test4() -+{ -+ in_za(); // Uses zeroed contents. -+} -+ -+/* -+** test5: -+** ... -+** mrs x0, tpidr2_el0 -+** cbz x0, [^\n]+ -+** bl __arm_tpidr2_save -+** msr tpidr2_el0, xzr -+** smstop za -+** bl private_za -+** smstart za -+** bl out_za -+** bl in_za -+** smstop za -+** bl private_za -+** ldp [^\n]+ -+** ret -+*/ -+__arm_new("za") void test5() -+{ -+ private_za(); -+ out_za(); -+ in_za(); -+ private_za(); -+} -+ -+// Despite the long test, there shouldn't be too much scope for variation -+// here. The point is both to test correctness and code quality. -+/* -+** test6: -+** stp [^\n]+ -+** mov x29, sp -+** mrs x0, tpidr2_el0 -+** cbz x0, [^\n]+ -+** bl __arm_tpidr2_save -+** msr tpidr2_el0, xzr -+** smstart za -+** bl out_za -+** rdsvl (x[0-9]+), #1 -+** mul (x[0-9]+), \1, \1 -+** sub sp, sp, \2 -+** mov (x[0-9]+), sp -+** stp \3, \1, \[x29, #?16\] -+** add (x[0-9]+), x29, #?16 -+** msr tpidr2_el0, \4 -+** bl private_za -+** ( -+** add (x[0-9]+), x29, #?16 -+** mrs (x[0-9]+), tpidr2_el0 -+** cbnz \6, [^\n]+ -+** smstart za -+** mov x0, \5 -+** | -+** add x0, x29, #?16 -+** mrs (x[0-9]+), tpidr2_el0 -+** cbnz \6, [^\n]+ -+** smstart za -+** ) -+** bl __arm_tpidr2_restore -+** msr tpidr2_el0, xzr -+** bl in_za -+** smstop za -+** mov sp, x29 -+** ldp [^\n]+ -+** ret -+*/ -+__arm_new("za") void test6() -+{ -+ out_za(); -+ private_za(); -+ in_za(); -+} -+ -+// Rely on previous tests for the part leading up to the smstart. -+/* -+** test7: -+** ... -+** smstart za -+** bl out_za -+** bl in_za -+** smstop za -+** bl private_za -+** smstart za -+** bl out_za -+** bl in_za -+** smstop za -+** ldp [^\n]+ -+** ret -+*/ -+__arm_new("za") void test7() -+{ -+ out_za(); -+ in_za(); -+ private_za(); -+ out_za(); -+ in_za(); -+} -+ -+/* -+** test8: -+** ... -+** smstart za -+** bl out_za -+** bl in_za -+** smstop za -+** bl private_za -+** smstart za -+** bl out_za -+** bl in_za -+** smstop za -+** bl private_za -+** ldp [^\n]+ -+** ret -+*/ -+__arm_new("za") void test8() -+{ -+ out_za(); -+ in_za(); -+ private_za(); -+ out_za(); -+ in_za(); -+ private_za(); -+} -+ -+/* -+** test9: -+** ... -+** msr tpidr2_el0, x[0-9]+ -+** bl private_za -+** bl private_za -+** bl private_za -+** bl private_za -+** add x[0-9]+, x29, #?16 -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+*/ -+__arm_new("za") void test9() -+{ -+ out_za(); -+ private_za(); -+ private_za(); -+ private_za(); -+ private_za(); -+ in_za(); -+} -+ -+/* -+** test10: -+** ldr (w[0-9]+), \[x0\] -+** cbz \1, [^\n]+ -+** ldr [^\n]+ -+** add [^\n]+ -+** str [^\n]+ -+** ret -+** ... -+*/ -+__arm_new("za") void test10(volatile int *ptr) -+{ -+ if (__builtin_expect (*ptr != 0, 1)) -+ *ptr = *ptr + 1; -+ else -+ inout_za(); -+} -+ -+/* -+** test11: -+** ... -+** ldr w[0-9]+, [^\n]+ -+** add (w[0-9]+), [^\n]+ -+** str \1, [^\n]+ -+** ... -+** ret -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** smstart za -+** bl inout_za -+** ldr (w[0-9]+), [^\n]+ -+** cbnz \2, [^\n]+ -+** smstop za -+** ... -+*/ -+__arm_new("za") void test11(volatile int *ptr) -+{ -+ if (__builtin_expect (*ptr == 0, 0)) -+ do -+ inout_za(); -+ while (*ptr); -+ else -+ *ptr += 1; -+} -+ -+__arm_new("za") void test12(volatile int *ptr) -+{ -+ do -+ { -+ inout_za(); -+ private_za(); -+ } -+ while (*ptr); -+ out_za(); -+ in_za(); -+} -+ -+/* -+** test13: -+** stp [^\n]+ -+** ... -+** stp [^\n]+ -+** ... -+** bl __arm_tpidr2_save -+** ... -+** msr tpidr2_el0, x[0-9]+ -+** bl private_za -+** ... -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** bl inout_za -+** ... -+** msr tpidr2_el0, x[0-9]+ -+** ... -+** bl private_za -+** ... -+** cbnz [^\n]+ -+** smstart za -+** msr tpidr2_el0, xzr -+** bl out_za -+** bl in_za -+** ... -+** smstop za -+** ... -+*/ -+__arm_new("za") void test13(volatile int *ptr) -+{ -+ do -+ { -+ private_za(); -+ inout_za(); -+ private_za(); -+ } -+ while (*ptr); -+ out_za(); -+ in_za(); -+} -+ -+/* -+** test14: -+** ... -+** bl __arm_tpidr2_save -+** ... -+** smstart za -+** bl inout_za -+** ldr [^\n]+ -+** cbnz [^\n]+ -+** bl out_za -+** bl in_za -+** smstop za -+** ... -+*/ -+__arm_new("za") void test14(volatile int *ptr) -+{ -+ do -+ inout_za(); -+ while (*ptr); -+ out_za(); -+ in_za(); -+} -+ -+/* -+** test15: -+** ... -+** bl __arm_tpidr2_save -+** ... -+** smstart za -+** bl out_za -+** bl in_za -+** ldr [^\n]+ -+** cbnz [^\n]+ -+** smstop za -+** bl private_za -+** ldr [^\n]+ -+** ldp [^\n]+ -+** ret -+*/ -+__arm_new("za") void test15(volatile int *ptr) -+{ -+ do -+ { -+ out_za(); -+ in_za(); -+ } -+ while (*ptr); -+ private_za(); -+} -+ -+/* -+** test16: -+** ... -+** bl __arm_tpidr2_save -+** ... -+** smstart za -+** b [^\n]+ -+-- loop: -+** ... -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** msr tpidr2_el0, xzr -+-- loop_entry: -+** bl inout_za -+** ... -+** msr tpidr2_el0, x[0-9]+ -+** bl private_za -+** ldr [^\n]+ -+** cbnz [^\n]+ -+** msr tpidr2_el0, xzr -+** smstop za -+** bl private_za -+** ... -+*/ -+__arm_new("za") void test16(volatile int *ptr) -+{ -+ do -+ { -+ inout_za(); -+ private_za(); -+ } -+ while (*ptr); -+ private_za(); -+} -+ -+/* -+** test17: -+** ... -+** bl private_za -+** ldr [^\n]+ -+** cbnz [^\n]+ -+** ... -+** msr tpidr2_el0, xzr -+** ... -+** smstop za -+** ... -+*/ -+__arm_new("za") void test17(volatile int *ptr) -+{ -+ do -+ { -+ inout_za(); -+ private_za(); -+ } -+ while (*ptr); -+} -+ -+/* -+** test18: -+** ldr w[0-9]+, [^\n]+ -+** cbnz w[0-9]+, [^\n]+ -+** ret -+** ... -+** smstop za -+** bl private_za -+** ... -+*/ -+__arm_new("za") void test18(volatile int *ptr) -+{ -+ if (__builtin_expect (*ptr, 0)) -+ { -+ out_za(); -+ in_za(); -+ private_za(); -+ } -+} -+ -+/* -+** test19: -+** ... -+** ldr w[0-9]+, [^\n]+ -+** cbz w[0-9]+, [^\n]+ -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** smstop za -+** bl private_za -+** ... -+*/ -+__arm_new("za") void test19(volatile int *ptr) -+{ -+ if (__builtin_expect (*ptr != 0, 1)) -+ private_za(); -+ else -+ do -+ { -+ inout_za(); -+ private_za(); -+ } -+ while (*ptr); -+} -+ -+/* -+** test20: -+** ... -+** bl a20 -+** (?:(?!x0).)* -+** bl b20 -+** ... -+** mov ([wx][0-9]+), [wx]0 -+** ... -+** bl __arm_tpidr2_restore -+** ... -+** mov [wx]0, \1 -+** ... -+** bl c20 -+** ... -+*/ -+__arm_new("za") void test20() -+{ -+ extern int a20() __arm_inout("za"); -+ extern int b20(int); -+ extern void c20(int) __arm_inout("za"); -+ c20(b20(a20())); -+} -+ -+/* -+** test21: -+** ... -+** bl a21 -+** (?:(?!x0).)* -+** bl b21 -+** ... -+** mov (x[0-9]+), x0 -+** ... -+** bl __arm_tpidr2_restore -+** ... -+** mov x0, \1 -+** ... -+** bl c21 -+** ... -+*/ -+__arm_new("za") void test21() -+{ -+ extern __UINT64_TYPE__ a21() __arm_inout("za"); -+ extern __UINT64_TYPE__ b21(__UINT64_TYPE__); -+ extern void c21(__UINT64_TYPE__) __arm_inout("za"); -+ c21(b21(a21())); -+} -+ -+/* -+** test22: -+** (?:(?!rdsvl).)* -+** rdsvl x[0-9]+, #1 -+** (?:(?!rdsvl).)* -+*/ -+__arm_new("za") void test22(volatile int *ptr) -+{ -+ inout_za(); -+ if (*ptr) -+ *ptr += 1; -+ else -+ private_za(); -+ private_za(); -+ in_za(); -+} -+ -+/* -+** test23: -+** (?:(?!__arm_tpidr2_save).)* -+** bl __arm_tpidr2_save -+** (?:(?!__arm_tpidr2_save).)* -+*/ -+__arm_new("za") void test23(volatile int *ptr) -+{ -+ if (*ptr) -+ *ptr += 1; -+ else -+ inout_za(); -+ inout_za(); -+} -+ -+/* -+** test24: -+** ... -+** bl in_za -+** ... -+** incb x1 -+** ... -+** bl out_za -+** bl inout_za -+** ... -+** msr tpidr2_el0, x[0-9]+ -+** ... -+** bl private_za -+** ... -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** incb x1 -+** ... -+** msr tpidr2_el0, x[0-9]+ -+** ... -+** bl private_za -+** ... -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** incb x1 -+** ... -+** smstop za -+** ... -+** bl private_za -+** ... -+** ret -+*/ -+__arm_new("za") void test24() -+{ -+ in_za(); -+ asm ("incb\tx1" ::: "x1", "za"); -+ out_za(); -+ inout_za(); -+ private_za(); -+ asm ("incb\tx1" ::: "x1", "za"); -+ private_za(); -+ asm ("incb\tx1" ::: "x1", "za"); -+ in_za(); -+ private_za(); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c -new file mode 100644 -index 000000000..d54840d3d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c -@@ -0,0 +1,595 @@ -+// { dg-options "-O2 -fno-optimize-sibling-calls" } -+// { dg-final { check-function-bodies "**" "" } } -+ -+void private_za(); -+void out_za() __arm_out("za"); -+void in_za() __arm_in("za"); -+void inout_za() __arm_inout("za"); -+void preserves_za() __arm_preserves("za"); -+ -+/* -+** test1: -+** ret -+*/ -+void test1() __arm_inout("za") -+{ -+} -+ -+/* -+** test2: -+** ldr w0, \[x0\] -+** ret -+*/ -+int test2(int *ptr) __arm_inout("za") -+{ -+ return *ptr; -+} -+ -+/* -+** test3: -+** ... -+** sub sp, sp, x[0-9]+ -+** ... -+** msr tpidr2_el0, x[0-9]+ -+** ... -+** bl private_za -+** ... -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** smstart za -+** ... -+** bl __arm_tpidr2_restore -+** ... -+** msr tpidr2_el0, xzr -+** ... -+*/ -+int test3() __arm_inout("za") -+{ -+ private_za(); -+ return 0; -+} -+ -+/* -+** test4: -+** stp [^\n]+ -+** [^\n]+ -+** bl in_za -+** ldp [^\n]+ -+** ret -+*/ -+void test4() __arm_inout("za") -+{ -+ in_za(); -+} -+ -+/* -+** test5: -+** ... -+** smstop za -+** ... -+** bl private_za -+** smstart za -+** bl out_za -+** bl in_za -+** ... -+** sub sp, sp, x[0-9]+ -+** ... -+** msr tpidr2_el0, x[0-9]+ -+** ... -+** bl private_za -+** ... -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** smstart za -+** ... -+** bl __arm_tpidr2_restore -+** ... -+** msr tpidr2_el0, xzr -+** ... -+*/ -+void test5() __arm_inout("za") -+{ -+ private_za(); -+ out_za(); -+ in_za(); -+ private_za(); -+} -+ -+/* -+** test6: -+** ... -+** bl out_za -+** ... -+** sub sp, sp, x[0-9]+ -+** ... -+** msr tpidr2_el0, x[0-9]+ -+** ... -+** bl private_za -+** ... -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** smstart za -+** ... -+** bl __arm_tpidr2_restore -+** ... -+** msr tpidr2_el0, xzr -+** ... -+** bl in_za -+** ... -+*/ -+void test6() __arm_inout("za") -+{ -+ out_za(); -+ private_za(); -+ in_za(); -+} -+ -+/* -+** test7: -+** stp [^\n]+ -+** [^\n]+ -+** bl out_za -+** bl in_za -+** smstop za -+** bl private_za -+** smstart za -+** bl out_za -+** bl in_za -+** ldp [^\n]+ -+** ret -+*/ -+void test7() __arm_inout("za") -+{ -+ out_za(); -+ in_za(); -+ private_za(); -+ out_za(); -+ in_za(); -+} -+ -+/* -+** test8: -+** stp [^\n]+ -+** [^\n]+ -+** bl out_za -+** bl in_za -+** smstop za -+** bl private_za -+** smstart za -+** bl out_za -+** bl in_za -+** ... -+** sub sp, sp, x[0-9]+ -+** ... -+** msr tpidr2_el0, x[0-9]+ -+** ... -+** bl private_za -+** ... -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** smstart za -+** ... -+** bl __arm_tpidr2_restore -+** ... -+** msr tpidr2_el0, xzr -+** ... -+** ret -+*/ -+void test8() __arm_inout("za") -+{ -+ out_za(); -+ in_za(); -+ private_za(); -+ out_za(); -+ in_za(); -+ private_za(); -+} -+ -+/* -+** test9: -+** stp [^\n]+ -+** [^\n]+ -+** bl out_za -+** ... -+** msr tpidr2_el0, x[0-9]+ -+** bl private_za -+** bl private_za -+** bl private_za -+** bl private_za -+** ... -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** smstart za -+** ... -+** bl __arm_tpidr2_restore -+** ... -+** msr tpidr2_el0, xzr -+** ... -+*/ -+void test9() __arm_inout("za") -+{ -+ out_za(); -+ private_za(); -+ private_za(); -+ private_za(); -+ private_za(); -+ in_za(); -+} -+ -+/* -+** test10: -+** ldr (w[0-9]+), \[x0\] -+** cbz \1, [^\n]+ -+** ldr [^\n]+ -+** add [^\n]+ -+** str [^\n]+ -+** ret -+** ... -+*/ -+void test10(volatile int *ptr) __arm_inout("za") -+{ -+ if (__builtin_expect (*ptr != 0, 1)) -+ *ptr = *ptr + 1; -+ else -+ inout_za(); -+} -+ -+/* -+** test11: -+** (?!.*(\t__arm|\tza|tpidr2_el0)).* -+*/ -+void test11(volatile int *ptr) __arm_inout("za") -+{ -+ if (__builtin_expect (*ptr == 0, 0)) -+ do -+ inout_za(); -+ while (*ptr); -+ else -+ *ptr += 1; -+} -+ -+void test12(volatile int *ptr) __arm_inout("za") -+{ -+ do -+ { -+ inout_za(); -+ private_za(); -+ } -+ while (*ptr); -+ out_za(); -+ in_za(); -+} -+ -+/* -+** test13: -+** stp [^\n]+ -+** ... -+** stp [^\n]+ -+** ... -+-- loop: -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** smstart za -+** ... -+** bl __arm_tpidr2_restore -+** ... -+** msr tpidr2_el0, xzr -+** bl inout_za -+** ... -+** msr tpidr2_el0, x[0-9]+ -+** ... -+** bl private_za -+** ldr [^\n]+ -+** cbnz [^\n]+ -+** smstart za -+** msr tpidr2_el0, xzr -+** bl out_za -+** bl in_za -+** [^\n]+ -+** [^\n]+ -+** ldp [^\n]+ -+** ret -+*/ -+void test13(volatile int *ptr) __arm_inout("za") -+{ -+ do -+ { -+ private_za(); -+ inout_za(); -+ private_za(); -+ } -+ while (*ptr); -+ out_za(); -+ in_za(); -+} -+ -+/* -+** test14: -+** ... -+** bl inout_za -+** ldr [^\n]+ -+** cbnz [^\n]+ -+** bl out_za -+** bl in_za -+** ... -+*/ -+void test14(volatile int *ptr) __arm_inout("za") -+{ -+ do -+ inout_za(); -+ while (*ptr); -+ out_za(); -+ in_za(); -+} -+ -+/* -+** test15: -+** ... -+** bl out_za -+** bl in_za -+** ldr [^\n]+ -+** cbnz [^\n]+ -+** ... -+** stp [^\n]+ -+** ... -+** msr tpidr2_el0, [^\n]+ -+** ... -+** bl private_za -+** ... -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** bl __arm_tpidr2_restore -+** ... -+** msr tpidr2_el0, xzr -+** ... -+*/ -+void test15(volatile int *ptr) __arm_inout("za") -+{ -+ do -+ { -+ out_za(); -+ in_za(); -+ } -+ while (*ptr); -+ private_za(); -+} -+ -+/* -+** test16: -+** stp [^\n]+ -+** ... -+** stp [^\n]+ -+** ... -+** b [^\n]+ -+-- loop: -+** ... -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** msr tpidr2_el0, xzr -+-- loop_entry: -+** bl inout_za -+** ... -+** msr tpidr2_el0, x[0-9]+ -+** ... -+** bl private_za -+** ... -+** bl private_za -+** ... -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** bl __arm_tpidr2_restore -+** ... -+** msr tpidr2_el0, xzr -+** ... -+*/ -+void test16(volatile int *ptr) __arm_inout("za") -+{ -+ do -+ { -+ inout_za(); -+ private_za(); -+ } -+ while (*ptr); -+ private_za(); -+} -+ -+/* -+** test17: -+** ... -+-- loop: -+** bl inout_za -+** ... -+** msr tpidr2_el0, x[0-9]+ -+** ... -+** bl private_za -+** ... -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** smstart za -+** ... -+** bl __arm_tpidr2_restore -+** ... -+** msr tpidr2_el0, xzr -+** ... -+** cbnz [^\n]+ -+** [^\n]+ -+** [^\n]+ -+** ldp [^\n]+ -+** ret -+*/ -+void test17(volatile int *ptr) __arm_inout("za") -+{ -+ do -+ { -+ inout_za(); -+ private_za(); -+ while (*ptr) -+ ptr += 1; -+ } -+ while (*ptr); -+} -+ -+/* -+** test18: -+** ldr w[0-9]+, [^\n]+ -+** cbnz w[0-9]+, [^\n]+ -+** ret -+** ... -+** bl out_za -+** bl in_za -+** ... -+** msr tpidr2_el0, x[0-9]+ -+** ... -+** bl private_za -+** ... -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** bl __arm_tpidr2_restore -+** ... -+** msr tpidr2_el0, xzr -+** ... -+*/ -+void test18(volatile int *ptr) __arm_inout("za") -+{ -+ if (__builtin_expect (*ptr, 0)) -+ { -+ out_za(); -+ in_za(); -+ private_za(); -+ } -+} -+ -+void test19(volatile int *ptr) __arm_inout("za") -+{ -+ if (__builtin_expect (*ptr != 0, 1)) -+ private_za(); -+ else -+ do -+ { -+ inout_za(); -+ private_za(); -+ } -+ while (*ptr); -+} -+ -+/* -+** test20: -+** ... -+** bl a20 -+** (?:(?!x0).)* -+** bl b20 -+** ... -+** mov ([wx][0-9]+), [wx]0 -+** ... -+** bl __arm_tpidr2_restore -+** ... -+** mov [wx]0, \1 -+** ... -+** bl c20 -+** ... -+*/ -+void test20() __arm_inout("za") -+{ -+ extern int a20() __arm_inout("za"); -+ extern int b20(int); -+ extern void c20(int) __arm_inout("za"); -+ c20(b20(a20())); -+} -+ -+/* -+** test21: -+** ... -+** bl a21 -+** (?:(?!x0).)* -+** bl b21 -+** ... -+** mov (x[0-9]+), x0 -+** ... -+** bl __arm_tpidr2_restore -+** ... -+** mov x0, \1 -+** ... -+** bl c21 -+** ... -+*/ -+void test21() __arm_inout("za") -+{ -+ extern __UINT64_TYPE__ a21() __arm_inout("za"); -+ extern __UINT64_TYPE__ b21(__UINT64_TYPE__); -+ extern void c21(__UINT64_TYPE__) __arm_inout("za"); -+ c21(b21(a21())); -+} -+ -+/* -+** test22: -+** (?:(?!rdsvl).)* -+** rdsvl x[0-9]+, #1 -+** (?:(?!rdsvl).)* -+*/ -+void test22(volatile int *ptr) __arm_inout("za") -+{ -+ inout_za(); -+ if (*ptr) -+ *ptr += 1; -+ else -+ private_za(); -+ private_za(); -+ in_za(); -+} -+ -+void test23(volatile int *ptr) __arm_inout("za") -+{ -+ if (*ptr) -+ *ptr += 1; -+ else -+ inout_za(); -+ inout_za(); -+} -+ -+/* -+** test24: -+** ... -+** bl in_za -+** ... -+** incb x1 -+** ... -+** bl out_za -+** bl inout_za -+** ... -+** msr tpidr2_el0, x[0-9]+ -+** ... -+** bl private_za -+** ... -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** incb x1 -+** ... -+** msr tpidr2_el0, x[0-9]+ -+** ... -+** bl private_za -+** ... -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** incb x1 -+** ... -+** msr tpidr2_el0, x[0-9]+ -+** ... -+** bl private_za -+** ... -+** mrs x[0-9]+, tpidr2_el0 -+** ... -+** ret -+*/ -+void test24() __arm_inout("za") -+{ -+ in_za(); -+ asm ("incb\tx1" ::: "x1", "za"); -+ out_za(); -+ inout_za(); -+ private_za(); -+ asm ("incb\tx1" ::: "x1", "za"); -+ private_za(); -+ asm ("incb\tx1" ::: "x1", "za"); -+ in_za(); -+ private_za(); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_6.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_6.c -new file mode 100644 -index 000000000..d5b226ae1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_6.c -@@ -0,0 +1,23 @@ -+// { dg-options "-O -fno-optimize-sibling-calls -fomit-frame-pointer" } -+ -+void private_za(); -+void out_za() __arm_out("za"); -+void in_za() __arm_in("za"); -+ -+__arm_new("za") void test20(volatile int *ptr) -+{ -+ if (*ptr) -+ out_za(); -+ else -+ *ptr += 1; -+ *ptr += 1; -+ if (*ptr) -+ in_za(); -+ else -+ *ptr += 1; -+} -+ -+// { dg-final { scan-assembler {\tbl\t__arm_tpidr2_save\n} } } -+// { dg-final { scan-assembler {\tsmstart\tza\n} } } -+// { dg-final { scan-assembler {\tsmstop\tza\n} } } -+// { dg-final { scan-assembler-not {\tsub\tsp, sp, x[0-9]+\n} } } --- -2.33.0 - diff --git a/0207-Backport-SME-aarch64-Add-a-register-class-for-w12-w1.patch b/0207-Backport-SME-aarch64-Add-a-register-class-for-w12-w1.patch deleted file mode 100644 index 3d69ca5..0000000 --- a/0207-Backport-SME-aarch64-Add-a-register-class-for-w12-w1.patch +++ /dev/null @@ -1,103 +0,0 @@ -From 9866b4c1d85d88fd9e25ff3ac5224b69d4e0f0b2 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:26 +0000 -Subject: [PATCH 108/157] [Backport][SME] aarch64: Add a register class for - w12-w15 - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=084122adb5792a9c8e7f7876e2c1d59ba80c228b - -Some SME instructions use w12-w15 to index ZA. This patch -adds a register class for that range. - -gcc/ - * config/aarch64/aarch64.h (W12_W15_REGNUM_P): New macro. - (W12_W15_REGS): New register class. - (REG_CLASS_NAMES, REG_CLASS_CONTENTS): Add entries for it. - * config/aarch64/aarch64.cc (aarch64_regno_regclass) - (aarch64_class_max_nregs, aarch64_register_move_cost): Handle - W12_W15_REGS. ---- - gcc/config/aarch64/aarch64.cc | 12 +++++++----- - gcc/config/aarch64/aarch64.h | 6 ++++++ - 2 files changed, 13 insertions(+), 5 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index a6e996c5b..112dfeabb 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -14213,6 +14213,9 @@ aarch64_label_mentioned_p (rtx x) - enum reg_class - aarch64_regno_regclass (unsigned regno) - { -+ if (W12_W15_REGNUM_P (regno)) -+ return W12_W15_REGS; -+ - if (STUB_REGNUM_P (regno)) - return STUB_REGS; - -@@ -14577,6 +14580,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) - unsigned int nregs, vec_flags; - switch (regclass) - { -+ case W12_W15_REGS: - case STUB_REGS: - case TAILCALL_ADDR_REGS: - case POINTER_REGS: -@@ -16926,13 +16930,11 @@ aarch64_register_move_cost (machine_mode mode, - const struct cpu_regmove_cost *regmove_cost - = aarch64_tune_params.regmove_cost; - -- /* Caller save and pointer regs are equivalent to GENERAL_REGS. */ -- if (to == TAILCALL_ADDR_REGS || to == POINTER_REGS -- || to == STUB_REGS) -+ /* Trest any subset of POINTER_REGS as though it were GENERAL_REGS. */ -+ if (reg_class_subset_p (to, POINTER_REGS)) - to = GENERAL_REGS; - -- if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS -- || from == STUB_REGS) -+ if (reg_class_subset_p (from, POINTER_REGS)) - from = GENERAL_REGS; - - /* Make RDFFR very expensive. In particular, if we know that the FFR -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 89d30b9bf..8b21faf34 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -648,6 +648,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - && (REGNO) != R17_REGNUM \ - && (REGNO) != R30_REGNUM) \ - -+#define W12_W15_REGNUM_P(REGNO) \ -+ IN_RANGE (REGNO, R12_REGNUM, R15_REGNUM) -+ - #define FP_REGNUM_P(REGNO) \ - (((unsigned) (REGNO - V0_REGNUM)) <= (V31_REGNUM - V0_REGNUM)) - -@@ -674,6 +677,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - enum reg_class - { - NO_REGS, -+ W12_W15_REGS, - TAILCALL_ADDR_REGS, - STUB_REGS, - GENERAL_REGS, -@@ -698,6 +702,7 @@ enum reg_class - #define REG_CLASS_NAMES \ - { \ - "NO_REGS", \ -+ "W12_W15_REGS", \ - "TAILCALL_ADDR_REGS", \ - "STUB_REGS", \ - "GENERAL_REGS", \ -@@ -719,6 +724,7 @@ enum reg_class - #define REG_CLASS_CONTENTS \ - { \ - { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \ -+ { 0x0000f000, 0x00000000, 0x00000000 }, /* W12_W15_REGS */ \ - { 0x00030000, 0x00000000, 0x00000000 }, /* TAILCALL_ADDR_REGS */\ - { 0x3ffcffff, 0x00000000, 0x00000000 }, /* STUB_REGS */ \ - { 0x7fffffff, 0x00000000, 0x00000003 }, /* GENERAL_REGS */ \ --- -2.33.0 - diff --git a/0208-Backport-SME-aarch64-Add-a-VNx1TI-mode.patch b/0208-Backport-SME-aarch64-Add-a-VNx1TI-mode.patch deleted file mode 100644 index eefe7d8..0000000 --- a/0208-Backport-SME-aarch64-Add-a-VNx1TI-mode.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 8310c0df319a86bc2f63b8d3198dd1c394827bac Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:27 +0000 -Subject: [PATCH 109/157] [Backport][SME] aarch64: Add a VNx1TI mode - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=80fc055cf00fee4b1f9f19f77c8880b12226e086 - -Although TI isn't really a native SVE element mode, it's convenient -for SME if we define VNx1TI anyway, so that it can be used to -distinguish .Q ZA operations from others. It's purely an RTL -convenience and isn't (yet) a valid storage mode. - -gcc/ - * config/aarch64/aarch64-modes.def: Add VNx1TI. ---- - gcc/config/aarch64/aarch64-modes.def | 21 ++++++++++++++------- - 1 file changed, 14 insertions(+), 7 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def -index 8f399225a..8fa66fdb3 100644 ---- a/gcc/config/aarch64/aarch64-modes.def -+++ b/gcc/config/aarch64/aarch64-modes.def -@@ -146,7 +146,7 @@ ADV_SIMD_Q_REG_STRUCT_MODES (4, V4x16, V4x8, V4x4, V4x2) - for 8-bit, 16-bit, 32-bit and 64-bit elements respectively. It isn't - strictly necessary to set the alignment here, since the default would - be clamped to BIGGEST_ALIGNMENT anyhow, but it seems clearer. */ --#define SVE_MODES(NVECS, VB, VH, VS, VD) \ -+#define SVE_MODES(NVECS, VB, VH, VS, VD, VT) \ - VECTOR_MODES_WITH_PREFIX (VNx, INT, 16 * NVECS, NVECS == 1 ? 1 : 4); \ - VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 16 * NVECS, NVECS == 1 ? 1 : 4); \ - \ -@@ -154,6 +154,7 @@ ADV_SIMD_Q_REG_STRUCT_MODES (4, V4x16, V4x8, V4x4, V4x2) - ADJUST_NUNITS (VH##HI, aarch64_sve_vg * NVECS * 4); \ - ADJUST_NUNITS (VS##SI, aarch64_sve_vg * NVECS * 2); \ - ADJUST_NUNITS (VD##DI, aarch64_sve_vg * NVECS); \ -+ ADJUST_NUNITS (VT##TI, exact_div (aarch64_sve_vg * NVECS, 2)); \ - ADJUST_NUNITS (VH##BF, aarch64_sve_vg * NVECS * 4); \ - ADJUST_NUNITS (VH##HF, aarch64_sve_vg * NVECS * 4); \ - ADJUST_NUNITS (VS##SF, aarch64_sve_vg * NVECS * 2); \ -@@ -163,17 +164,23 @@ ADV_SIMD_Q_REG_STRUCT_MODES (4, V4x16, V4x8, V4x4, V4x2) - ADJUST_ALIGNMENT (VH##HI, 16); \ - ADJUST_ALIGNMENT (VS##SI, 16); \ - ADJUST_ALIGNMENT (VD##DI, 16); \ -+ ADJUST_ALIGNMENT (VT##TI, 16); \ - ADJUST_ALIGNMENT (VH##BF, 16); \ - ADJUST_ALIGNMENT (VH##HF, 16); \ - ADJUST_ALIGNMENT (VS##SF, 16); \ - ADJUST_ALIGNMENT (VD##DF, 16); - --/* Give SVE vectors the names normally used for 256-bit vectors. -- The actual number depends on command-line flags. */ --SVE_MODES (1, VNx16, VNx8, VNx4, VNx2) --SVE_MODES (2, VNx32, VNx16, VNx8, VNx4) --SVE_MODES (3, VNx48, VNx24, VNx12, VNx6) --SVE_MODES (4, VNx64, VNx32, VNx16, VNx8) -+/* Give SVE vectors names of the form VNxX, where X describes what is -+ stored in each 128-bit unit. The actual size of the mode depends -+ on command-line flags. -+ -+ VNx1TI isn't really a native SVE mode, but it can be useful in some -+ limited situations. */ -+VECTOR_MODE_WITH_PREFIX (VNx, INT, TI, 1, 1); -+SVE_MODES (1, VNx16, VNx8, VNx4, VNx2, VNx1) -+SVE_MODES (2, VNx32, VNx16, VNx8, VNx4, VNx2) -+SVE_MODES (3, VNx48, VNx24, VNx12, VNx6, VNx3) -+SVE_MODES (4, VNx64, VNx32, VNx16, VNx8, VNx4) - - /* Partial SVE vectors: - --- -2.33.0 - diff --git a/0209-Backport-SME-aarch64-Generalise-unspec_based_functio.patch b/0209-Backport-SME-aarch64-Generalise-unspec_based_functio.patch deleted file mode 100644 index 1c2ac4e..0000000 --- a/0209-Backport-SME-aarch64-Generalise-unspec_based_functio.patch +++ /dev/null @@ -1,118 +0,0 @@ -From e3c0d3d98ab1f60900533f3f75c598f899f37c9f Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:27 +0000 -Subject: [PATCH 110/157] [Backport][SME] aarch64: Generalise - unspec_based_function_base - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1ec23d5a29bc5d89cef60e2aba2fe4095ee12a8f - -Until now, SVE intrinsics that map directly to unspecs -have always used type suffix 0 to distinguish between signed -integers, unsigned integers, and floating-point values. -SME adds functions that need to use type suffix 1 instead. -This patch generalises the classes accordingly. - -gcc/ - * config/aarch64/aarch64-sve-builtins-functions.h - (unspec_based_function_base): Allow type suffix 1 to determine - the mode of the operation. - (unspec_based_function): Update accordingly. - (unspec_based_fused_function): Likewise. - (unspec_based_fused_lane_function): Likewise. ---- - .../aarch64/aarch64-sve-builtins-functions.h | 29 ++++++++++++------- - 1 file changed, 18 insertions(+), 11 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h -index 94a6d1207..f5fa4030c 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h -+++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h -@@ -250,18 +250,21 @@ class unspec_based_function_base : public function_base - public: - CONSTEXPR unspec_based_function_base (int unspec_for_sint, - int unspec_for_uint, -- int unspec_for_fp) -+ int unspec_for_fp, -+ unsigned int suffix_index = 0) - : m_unspec_for_sint (unspec_for_sint), - m_unspec_for_uint (unspec_for_uint), -- m_unspec_for_fp (unspec_for_fp) -+ m_unspec_for_fp (unspec_for_fp), -+ m_suffix_index (suffix_index) - {} - - /* Return the unspec code to use for INSTANCE, based on type suffix 0. */ - int - unspec_for (const function_instance &instance) const - { -- return (!instance.type_suffix (0).integer_p ? m_unspec_for_fp -- : instance.type_suffix (0).unsigned_p ? m_unspec_for_uint -+ auto &suffix = instance.type_suffix (m_suffix_index); -+ return (!suffix.integer_p ? m_unspec_for_fp -+ : suffix.unsigned_p ? m_unspec_for_uint - : m_unspec_for_sint); - } - -@@ -270,6 +273,9 @@ public: - int m_unspec_for_sint; - int m_unspec_for_uint; - int m_unspec_for_fp; -+ -+ /* Which type suffix is used to choose between the unspecs. */ -+ unsigned int m_suffix_index; - }; - - /* A function_base for functions that have an associated unspec code. -@@ -336,7 +342,8 @@ public: - rtx - expand (function_expander &e) const OVERRIDE - { -- return e.use_exact_insn (CODE (unspec_for (e), e.vector_mode (0))); -+ return e.use_exact_insn (CODE (unspec_for (e), -+ e.vector_mode (m_suffix_index))); - } - }; - -@@ -395,16 +402,16 @@ public: - { - int unspec = unspec_for (e); - insn_code icode; -- if (e.type_suffix (0).float_p) -+ if (e.type_suffix (m_suffix_index).float_p) - { - /* Put the operands in the normal (fma ...) order, with the accumulator - last. This fits naturally since that's also the unprinted operand - in the asm output. */ - e.rotate_inputs_left (0, e.pred != PRED_none ? 4 : 3); -- icode = code_for_aarch64_sve (unspec, e.vector_mode (0)); -+ icode = code_for_aarch64_sve (unspec, e.vector_mode (m_suffix_index)); - } - else -- icode = INT_CODE (unspec, e.vector_mode (0)); -+ icode = INT_CODE (unspec, e.vector_mode (m_suffix_index)); - return e.use_exact_insn (icode); - } - }; -@@ -430,16 +437,16 @@ public: - { - int unspec = unspec_for (e); - insn_code icode; -- if (e.type_suffix (0).float_p) -+ if (e.type_suffix (m_suffix_index).float_p) - { - /* Put the operands in the normal (fma ...) order, with the accumulator - last. This fits naturally since that's also the unprinted operand - in the asm output. */ - e.rotate_inputs_left (0, e.pred != PRED_none ? 5 : 4); -- icode = code_for_aarch64_lane (unspec, e.vector_mode (0)); -+ icode = code_for_aarch64_lane (unspec, e.vector_mode (m_suffix_index)); - } - else -- icode = INT_CODE (unspec, e.vector_mode (0)); -+ icode = INT_CODE (unspec, e.vector_mode (m_suffix_index)); - return e.use_exact_insn (icode); - } - }; --- -2.33.0 - diff --git a/0210-Backport-SME-aarch64-Generalise-_m-rules-for-SVE-int.patch b/0210-Backport-SME-aarch64-Generalise-_m-rules-for-SVE-int.patch deleted file mode 100644 index 75de62e..0000000 --- a/0210-Backport-SME-aarch64-Generalise-_m-rules-for-SVE-int.patch +++ /dev/null @@ -1,117 +0,0 @@ -From 3d721b42c97baba562b77988cec0fec229217519 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:28 +0000 -Subject: [PATCH 111/157] [Backport][SME] aarch64: Generalise _m rules for SVE - intrinsics - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=8de9304d94d4ec42863a25c1cb1a1ba9a1e3e0fe - -In SVE there was a simple rule that unary merging (_m) intrinsics -had a separate initial argument to specify the values of inactive -lanes, whereas other merging functions took inactive lanes from -the first operand to the operation. - -That rule began to break down in SVE2, and it continues to do -so in SME. This patch therefore adds a virtual function to -specify whether the separate initial argument is present or not. -The old rule is still the default. - -gcc/ - * config/aarch64/aarch64-sve-builtins.h - (function_shape::has_merge_argument_p): New member function. - * config/aarch64/aarch64-sve-builtins.cc: - (function_resolver::check_gp_argument): Use it. - (function_expander::get_fallback_value): Likewise. - * config/aarch64/aarch64-sve-builtins-shapes.cc - (apply_predication): Likewise. - (unary_convert_narrowt_def::has_merge_argument_p): New function. ---- - gcc/config/aarch64/aarch64-sve-builtins-shapes.cc | 10 ++++++++-- - gcc/config/aarch64/aarch64-sve-builtins.cc | 4 ++-- - gcc/config/aarch64/aarch64-sve-builtins.h | 13 +++++++++++++ - 3 files changed, 23 insertions(+), 4 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -index 95e40d8f3..c536949ba 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -@@ -66,8 +66,8 @@ apply_predication (const function_instance &instance, tree return_type, - the same type as the result. For unary_convert_narrowt it also - provides the "bottom" half of active elements, and is present - for all types of predication. */ -- if ((argument_types.length () == 2 && instance.pred == PRED_m) -- || instance.shape == shapes::unary_convert_narrowt) -+ auto nargs = argument_types.length () - 1; -+ if (instance.shape->has_merge_argument_p (instance, nargs)) - argument_types.quick_insert (0, return_type); - } - } -@@ -3271,6 +3271,12 @@ SHAPE (unary_convert) - predicate. */ - struct unary_convert_narrowt_def : public overloaded_base<1> - { -+ bool -+ has_merge_argument_p (const function_instance &, unsigned int) const override -+ { -+ return true; -+ } -+ - void - build (function_builder &b, const function_group_info &group) const OVERRIDE - { -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc -index 5f3a2baea..3441b4294 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc -@@ -2287,7 +2287,7 @@ function_resolver::check_gp_argument (unsigned int nops, - if (pred != PRED_none) - { - /* Unary merge operations should use resolve_unary instead. */ -- gcc_assert (nops != 1 || pred != PRED_m); -+ gcc_assert (!shape->has_merge_argument_p (*this, nops)); - nargs = nops + 1; - if (!check_num_arguments (nargs) - || !require_vector_type (i, VECTOR_TYPE_svbool_t)) -@@ -2931,7 +2931,7 @@ function_expander::get_fallback_value (machine_mode mode, unsigned int nops, - - gcc_assert (pred == PRED_m || pred == PRED_x); - if (merge_argno == DEFAULT_MERGE_ARGNO) -- merge_argno = nops == 1 && pred == PRED_m ? 0 : 1; -+ merge_argno = shape->has_merge_argument_p (*this, nops) ? 0 : 1; - - if (merge_argno == 0) - return args[argno++]; -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h -index 7132b6e77..f16ac3947 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.h -+++ b/gcc/config/aarch64/aarch64-sve-builtins.h -@@ -710,6 +710,9 @@ public: - class function_shape - { - public: -+ virtual bool has_merge_argument_p (const function_instance &, -+ unsigned int) const; -+ - virtual bool explicit_type_suffix_p (unsigned int) const = 0; - - /* True if the group suffix is present in overloaded names. -@@ -982,6 +985,16 @@ function_base::vectors_per_tuple (const function_instance &instance) const - return instance.group_suffix ().vectors_per_tuple; - } - -+/* Return true if INSTANCE (which has NARGS arguments) has an initial -+ vector argument whose only purpose is to specify the values of -+ inactive lanes. */ -+inline bool -+function_shape::has_merge_argument_p (const function_instance &instance, -+ unsigned int nargs) const -+{ -+ return nargs == 1 && instance.pred == PRED_m; -+} -+ - /* Return the mode of the result of a call. */ - inline machine_mode - function_expander::result_mode () const --- -2.33.0 - diff --git a/0211-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch b/0211-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch deleted file mode 100644 index b83e594..0000000 --- a/0211-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch +++ /dev/null @@ -1,15955 +0,0 @@ -From 6c651a11f8e68244c4c53ad7b29983f54a3bc737 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:28 +0000 -Subject: [PATCH 112/157] [Backport][SME] aarch64: Add support for - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4f6ab9537051e156d52bd8e9df40107ba6685895 - -This adds support for the SME parts of arm_sme.h. - -gcc/ - * doc/invoke.texi: Document +sme-i16i64 and +sme-f64f64. - * config.gcc (aarch64*-*-*): Add arm_sme.h to the list of headers - to install and aarch64-sve-builtins-sme.o to the list of objects - to build. - * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Define - or undefine TARGET_SME, TARGET_SME_I16I64 and TARGET_SME_F64F64. - (aarch64_pragma_aarch64): Handle arm_sme.h. - * config/aarch64/aarch64-option-extensions.def (sme-i16i64) - (sme-f64f64): New extensions. - * config/aarch64/aarch64-protos.h (aarch64_sme_vq_immediate) - (aarch64_addsvl_addspl_immediate_p, aarch64_output_addsvl_addspl) - (aarch64_output_sme_zero_za): Declare. - (aarch64_output_move_struct): Delete. - (aarch64_sme_ldr_vnum_offset): Declare. - (aarch64_sve::handle_arm_sme_h): Likewise. - * config/aarch64/aarch64.h (AARCH64_ISA_SM_ON): New macro. - (AARCH64_ISA_SME_I16I64, AARCH64_ISA_SME_F64F64): Likewise. - (TARGET_STREAMING, TARGET_STREAMING_SME): Likewise. - (TARGET_SME_I16I64, TARGET_SME_F64F64): Likewise. - * config/aarch64/aarch64.cc (aarch64_sve_rdvl_factor_p): Rename to... - (aarch64_sve_rdvl_addvl_factor_p): ...this. - (aarch64_sve_rdvl_immediate_p): Update accordingly. - (aarch64_rdsvl_immediate_p, aarch64_add_offset): Likewise. - (aarch64_sme_vq_immediate): Likewise. Make public. - (aarch64_sve_addpl_factor_p): New function. - (aarch64_sve_addvl_addpl_immediate_p): Use - aarch64_sve_rdvl_addvl_factor_p and aarch64_sve_addpl_factor_p. - (aarch64_addsvl_addspl_immediate_p): New function. - (aarch64_output_addsvl_addspl): Likewise. - (aarch64_cannot_force_const_mem): Return true for RDSVL immediates. - (aarch64_classify_index): Handle .Q scaling for VNx1TImode. - (aarch64_classify_address): Likewise for vnum offsets. - (aarch64_output_sme_zero_za): New function. - (aarch64_sme_ldr_vnum_offset_p): Likewise. - * config/aarch64/predicates.md (aarch64_addsvl_addspl_immediate): - New predicate. - (aarch64_pluslong_operand): Include it for SME. - * config/aarch64/constraints.md (Ucj, Uav): New constraints. - * config/aarch64/iterators.md (VNx1TI_ONLY): New mode iterator. - (SME_ZA_I, SME_ZA_SDI, SME_ZA_SDF_I, SME_MOP_BHI): Likewise. - (SME_MOP_HSDF): Likewise. - (UNSPEC_SME_ADDHA, UNSPEC_SME_ADDVA, UNSPEC_SME_FMOPA) - (UNSPEC_SME_FMOPS, UNSPEC_SME_LD1_HOR, UNSPEC_SME_LD1_VER) - (UNSPEC_SME_READ_HOR, UNSPEC_SME_READ_VER, UNSPEC_SME_SMOPA) - (UNSPEC_SME_SMOPS, UNSPEC_SME_ST1_HOR, UNSPEC_SME_ST1_VER) - (UNSPEC_SME_SUMOPA, UNSPEC_SME_SUMOPS, UNSPEC_SME_UMOPA) - (UNSPEC_SME_UMOPS, UNSPEC_SME_USMOPA, UNSPEC_SME_USMOPS) - (UNSPEC_SME_WRITE_HOR, UNSPEC_SME_WRITE_VER): New unspecs. - (elem_bits): Handle x2 and x4 structure modes, plus VNx1TI. - (Vetype, Vesize, VPRED): Handle VNx1TI. - (b): New mode attribute. - (SME_LD1, SME_READ, SME_ST1, SME_WRITE, SME_BINARY_SDI, SME_INT_MOP) - (SME_FP_MOP): New int iterators. - (optab): Handle SME unspecs. - (hv): New int attribute. - * config/aarch64/aarch64.md (*add3_aarch64): Handle ADDSVL - and ADDSPL. - * config/aarch64/aarch64-sme.md (UNSPEC_SME_LDR): New unspec. - (@aarch64_sme_, @aarch64_sme__plus) - (aarch64_sme_ldr0, @aarch64_sme_ldrn): New patterns. - (UNSPEC_SME_STR): New unspec. - (@aarch64_sme_, @aarch64_sme__plus) - (aarch64_sme_str0, @aarch64_sme_strn): New patterns. - (@aarch64_sme_): Likewise. - (*aarch64_sme__plus): Likewise. - (@aarch64_sme_): Likewise. - (@aarch64_sme_): Likewise. - (*aarch64_sme__plus): Likewise. - (@aarch64_sme_): Likewise. - (UNSPEC_SME_ZERO): New unspec. - (aarch64_sme_zero): New pattern. - (@aarch64_sme_): Likewise. - (@aarch64_sme_): Likewise. - (@aarch64_sme_): Likewise. - * config/aarch64/aarch64-sve-builtins.def: Add ZA type suffixes. - Include aarch64-sve-builtins-sme.def. - (DEF_SME_ZA_FUNCTION): New macro. - * config/aarch64/aarch64-sve-builtins.h (CP_READ_ZA): New call - property. - (CP_WRITE_ZA): Likewise. - (PRED_za_m): New predication type. - (type_suffix_index): Handle DEF_SME_ZA_SUFFIX. - (type_suffix_info): Add vector_p and za_p fields. - (function_instance::num_za_tiles): New member function. - (function_builder::get_attributes): Add an aarch64_feature_flags - argument. - (function_expander::get_contiguous_base): Take a base argument - number, a vnum argument number, and an argument that indicates - whether the vnum parameter is a factor of the SME vector length - or the prevailing vector length. - (function_expander::add_integer_operand): Take a poly_int64. - (sve_switcher::sve_switcher): Take a base set of flags. - (sme_switcher): New class. - (scalar_types): Add a null entry for NUM_VECTOR_TYPES. - * config/aarch64/aarch64-sve-builtins.cc: Include - aarch64-sve-builtins-sme.h. - (pred_suffixes): Add an entry for PRED_za_m. - (type_suffixes): Initialize vector_p and za_p. Handle ZA suffixes. - (TYPES_all_za, TYPES_d_za, TYPES_za_bhsd_data, TYPES_za_all_data) - (TYPES_za_s_integer, TYPES_za_d_integer, TYPES_mop_base) - (TYPES_mop_base_signed, TYPES_mop_base_unsigned, TYPES_mop_i16i64) - (TYPES_mop_i16i64_signed, TYPES_mop_i16i64_unsigned, TYPES_za): New - type suffix macros. - (preds_m, preds_za_m): New predication lists. - (function_groups): Handle DEF_SME_ZA_FUNCTION. - (scalar_types): Add an entry for NUM_VECTOR_TYPES. - (find_type_suffix_for_scalar_type): Check positively for vectors - rather than negatively for predicates. - (check_required_extensions): Handle PSTATE.SM and PSTATE.ZA - requirements. - (report_out_of_range): Handle the case where the minimum and - maximum are the same. - (function_instance::reads_global_state_p): Return true for functions - that read ZA. - (function_instance::modifies_global_state_p): Return true for functions - that write to ZA. - (sve_switcher::sve_switcher): Add a base flags argument. - (function_builder::get_name): Handle "__arm_" prefixes. - (add_attribute): Add an overload that takes a namespaces. - (add_shared_state_attribute): New function. - (function_builder::get_attributes): Take the required feature flags - as argument. Add streaming and ZA attributes where appropriate. - (function_builder::add_unique_function): Update calls accordingly. - (function_resolver::check_gp_argument): Assert that the predication - isn't ZA _m predication. - (function_checker::function_checker): Don't bias the argument - number for ZA _m predication. - (function_expander::get_contiguous_base): Add arguments that - specify the base argument number, the vnum argument number, - and an argument that indicates whether the vnum parameter is - a factor of the SME vector length or the prevailing vector length. - Handle the SME case. - (function_expander::add_input_operand): Handle pmode_register_operand. - (function_expander::add_integer_operand): Take a poly_int64. - (init_builtins): Call handle_arm_sme_h for LTO. - (handle_arm_sve_h): Skip SME intrinsics. - (handle_arm_sme_h): New function. - * config/aarch64/aarch64-sve-builtins-functions.h - (read_write_za, write_za): New classes. - (unspec_based_sme_function, za_arith_function): New using aliases. - (quiet_za_arith_function): Likewise. - * config/aarch64/aarch64-sve-builtins-shapes.h - (binary_za_int_m, binary_za_m, binary_za_uint_m, bool_inherent) - (inherent_za, inherent_mask_za, ldr_za, load_za, read_za_m, store_za) - (str_za, unary_za_m, write_za_m): Declare. - * config/aarch64/aarch64-sve-builtins-shapes.cc (apply_predication): - Expect za_m functions to have an existing governing predicate. - (binary_za_m_base, binary_za_int_m_def, binary_za_m_def): New classes. - (binary_za_uint_m_def, bool_inherent_def, inherent_za_def): Likewise. - (inherent_mask_za_def, ldr_za_def, load_za_def, read_za_m_def) - (store_za_def, str_za_def, unary_za_m_def, write_za_m_def): Likewise. - * config/aarch64/arm_sme.h: New file. - * config/aarch64/aarch64-sve-builtins-sme.h: Likewise. - * config/aarch64/aarch64-sve-builtins-sme.cc: Likewise. - * config/aarch64/aarch64-sve-builtins-sme.def: Likewise. - * config/aarch64/t-aarch64 (aarch64-sve-builtins.o): Depend on - aarch64-sve-builtins-sme.def and aarch64-sve-builtins-sme.h. - (aarch64-sve-builtins-sme.o): New rule. - -gcc/testsuite/ - * lib/target-supports.exp: Add sme and sme-i16i64 features. - * gcc.target/aarch64/pragma_cpp_predefs_4.c: Test __ARM_FEATURE_SME* - macros. - * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h: Allow functions - to be marked as __arm_streaming, __arm_streaming_compatible, and - __arm_inout("za"). - * g++.target/aarch64/sve/acle/general-c++/func_redef_4.c: Mark the - function as __arm_streaming_compatible. - * g++.target/aarch64/sve/acle/general-c++/func_redef_5.c: Likewise. - * g++.target/aarch64/sve/acle/general-c++/func_redef_7.c: Likewise. - * gcc.target/aarch64/sve/acle/general-c/func_redef_4.c: Likewise. - * gcc.target/aarch64/sve/acle/general-c/func_redef_5.c: Likewise. - * g++.target/aarch64/sme/aarch64-sme-acle-asm.exp: New test harness. - * gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp: Likewise. - * gcc.target/aarch64/sve/acle/general-c/binary_za_int_m_1.c: New test. - * gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c: Likewise. - * gcc.target/aarch64/sve/acle/general-c/binary_za_m_2.c: Likewise. - * gcc.target/aarch64/sve/acle/general-c/binary_za_uint_m_1.c: Likewise. - * gcc.target/aarch64/sve/acle/general-c/read_za_m_1.c: Likewise. - * gcc.target/aarch64/sve/acle/general-c/unary_za_m_1.c: Likewise. - * gcc.target/aarch64/sve/acle/general-c/write_za_m_1.c: Likewise. ---- - gcc/config.gcc | 4 +- - gcc/config/aarch64/aarch64-c.cc | 6 + - .../aarch64/aarch64-option-extensions.def | 4 + - gcc/config/aarch64/aarch64-protos.h | 8 +- - gcc/config/aarch64/aarch64-sme.md | 373 +++++++++++++++ - .../aarch64/aarch64-sve-builtins-functions.h | 64 +++ - .../aarch64/aarch64-sve-builtins-shapes.cc | 306 +++++++++++- - .../aarch64/aarch64-sve-builtins-shapes.h | 13 + - .../aarch64/aarch64-sve-builtins-sme.cc | 412 +++++++++++++++++ - .../aarch64/aarch64-sve-builtins-sme.def | 76 +++ - gcc/config/aarch64/aarch64-sve-builtins-sme.h | 57 +++ - gcc/config/aarch64/aarch64-sve-builtins.cc | 336 ++++++++++++-- - gcc/config/aarch64/aarch64-sve-builtins.def | 28 ++ - gcc/config/aarch64/aarch64-sve-builtins.h | 46 +- - gcc/config/aarch64/aarch64.cc | 140 +++++- - gcc/config/aarch64/aarch64.h | 15 + - gcc/config/aarch64/aarch64.md | 13 +- - gcc/config/aarch64/arm_sme.h | 45 ++ - gcc/config/aarch64/constraints.md | 9 + - gcc/config/aarch64/iterators.md | 94 +++- - gcc/config/aarch64/predicates.md | 8 +- - gcc/config/aarch64/t-aarch64 | 17 +- - gcc/doc/invoke.texi | 4 + - .../aarch64/sme/aarch64-sme-acle-asm.exp | 82 ++++ - .../sve/acle/general-c++/func_redef_4.c | 3 +- - .../sve/acle/general-c++/func_redef_5.c | 1 + - .../sve/acle/general-c++/func_redef_7.c | 1 + - .../gcc.target/aarch64/pragma_cpp_predefs_4.c | 38 ++ - .../aarch64/sme/aarch64-sme-acle-asm.exp | 81 ++++ - .../aarch64/sme/acle-asm/addha_za32.c | 48 ++ - .../aarch64/sme/acle-asm/addha_za64.c | 50 ++ - .../aarch64/sme/acle-asm/addva_za32.c | 48 ++ - .../aarch64/sme/acle-asm/addva_za64.c | 50 ++ - .../aarch64/sme/acle-asm/arm_has_sme_sc.c | 25 + - .../sme/acle-asm/arm_in_streaming_mode_ns.c | 11 + - .../sme/acle-asm/arm_in_streaming_mode_s.c | 11 + - .../sme/acle-asm/arm_in_streaming_mode_sc.c | 26 ++ - .../gcc.target/aarch64/sme/acle-asm/cntsb_s.c | 310 +++++++++++++ - .../aarch64/sme/acle-asm/cntsb_sc.c | 12 + - .../gcc.target/aarch64/sme/acle-asm/cntsd_s.c | 277 +++++++++++ - .../aarch64/sme/acle-asm/cntsd_sc.c | 13 + - .../gcc.target/aarch64/sme/acle-asm/cntsh_s.c | 279 +++++++++++ - .../aarch64/sme/acle-asm/cntsh_sc.c | 13 + - .../gcc.target/aarch64/sme/acle-asm/cntsw_s.c | 278 +++++++++++ - .../aarch64/sme/acle-asm/cntsw_sc.c | 13 + - .../aarch64/sme/acle-asm/ld1_hor_vnum_za128.c | 77 ++++ - .../aarch64/sme/acle-asm/ld1_hor_vnum_za16.c | 123 +++++ - .../aarch64/sme/acle-asm/ld1_hor_vnum_za32.c | 123 +++++ - .../aarch64/sme/acle-asm/ld1_hor_vnum_za64.c | 112 +++++ - .../aarch64/sme/acle-asm/ld1_hor_vnum_za8.c | 112 +++++ - .../aarch64/sme/acle-asm/ld1_hor_za128.c | 83 ++++ - .../aarch64/sme/acle-asm/ld1_hor_za16.c | 126 +++++ - .../aarch64/sme/acle-asm/ld1_hor_za32.c | 125 +++++ - .../aarch64/sme/acle-asm/ld1_hor_za64.c | 105 +++++ - .../aarch64/sme/acle-asm/ld1_hor_za8.c | 95 ++++ - .../aarch64/sme/acle-asm/ld1_ver_vnum_za128.c | 77 ++++ - .../aarch64/sme/acle-asm/ld1_ver_vnum_za16.c | 123 +++++ - .../aarch64/sme/acle-asm/ld1_ver_vnum_za32.c | 123 +++++ - .../aarch64/sme/acle-asm/ld1_ver_vnum_za64.c | 112 +++++ - .../aarch64/sme/acle-asm/ld1_ver_vnum_za8.c | 112 +++++ - .../aarch64/sme/acle-asm/ld1_ver_za128.c | 83 ++++ - .../aarch64/sme/acle-asm/ld1_ver_za16.c | 126 +++++ - .../aarch64/sme/acle-asm/ld1_ver_za32.c | 125 +++++ - .../aarch64/sme/acle-asm/ld1_ver_za64.c | 105 +++++ - .../aarch64/sme/acle-asm/ld1_ver_za8.c | 95 ++++ - .../aarch64/sme/acle-asm/ldr_vnum_za_s.c | 147 ++++++ - .../aarch64/sme/acle-asm/ldr_vnum_za_sc.c | 148 ++++++ - .../aarch64/sme/acle-asm/ldr_za_s.c | 124 +++++ - .../aarch64/sme/acle-asm/ldr_za_sc.c | 71 +++ - .../aarch64/sme/acle-asm/mopa_za32.c | 102 ++++ - .../aarch64/sme/acle-asm/mopa_za64.c | 70 +++ - .../aarch64/sme/acle-asm/mops_za32.c | 102 ++++ - .../aarch64/sme/acle-asm/mops_za64.c | 70 +++ - .../aarch64/sme/acle-asm/read_hor_za128.c | 435 ++++++++++++++++++ - .../aarch64/sme/acle-asm/read_hor_za16.c | 207 +++++++++ - .../aarch64/sme/acle-asm/read_hor_za32.c | 196 ++++++++ - .../aarch64/sme/acle-asm/read_hor_za64.c | 186 ++++++++ - .../aarch64/sme/acle-asm/read_hor_za8.c | 125 +++++ - .../aarch64/sme/acle-asm/read_ver_za128.c | 435 ++++++++++++++++++ - .../aarch64/sme/acle-asm/read_ver_za16.c | 207 +++++++++ - .../aarch64/sme/acle-asm/read_ver_za32.c | 196 ++++++++ - .../aarch64/sme/acle-asm/read_ver_za64.c | 186 ++++++++ - .../aarch64/sme/acle-asm/read_ver_za8.c | 125 +++++ - .../aarch64/sme/acle-asm/st1_hor_vnum_za128.c | 77 ++++ - .../aarch64/sme/acle-asm/st1_hor_vnum_za16.c | 123 +++++ - .../aarch64/sme/acle-asm/st1_hor_vnum_za32.c | 123 +++++ - .../aarch64/sme/acle-asm/st1_hor_vnum_za64.c | 112 +++++ - .../aarch64/sme/acle-asm/st1_hor_vnum_za8.c | 112 +++++ - .../aarch64/sme/acle-asm/st1_hor_za128.c | 83 ++++ - .../aarch64/sme/acle-asm/st1_hor_za16.c | 126 +++++ - .../aarch64/sme/acle-asm/st1_hor_za32.c | 125 +++++ - .../aarch64/sme/acle-asm/st1_hor_za64.c | 105 +++++ - .../aarch64/sme/acle-asm/st1_hor_za8.c | 95 ++++ - .../aarch64/sme/acle-asm/st1_ver_vnum_za128.c | 77 ++++ - .../aarch64/sme/acle-asm/st1_ver_vnum_za16.c | 123 +++++ - .../aarch64/sme/acle-asm/st1_ver_vnum_za32.c | 123 +++++ - .../aarch64/sme/acle-asm/st1_ver_vnum_za64.c | 112 +++++ - .../aarch64/sme/acle-asm/st1_ver_vnum_za8.c | 112 +++++ - .../aarch64/sme/acle-asm/st1_ver_za128.c | 83 ++++ - .../aarch64/sme/acle-asm/st1_ver_za16.c | 126 +++++ - .../aarch64/sme/acle-asm/st1_ver_za32.c | 125 +++++ - .../aarch64/sme/acle-asm/st1_ver_za64.c | 105 +++++ - .../aarch64/sme/acle-asm/st1_ver_za8.c | 95 ++++ - .../aarch64/sme/acle-asm/str_vnum_za_s.c | 147 ++++++ - .../aarch64/sme/acle-asm/str_vnum_za_sc.c | 148 ++++++ - .../aarch64/sme/acle-asm/str_za_s.c | 124 +++++ - .../aarch64/sme/acle-asm/str_za_sc.c | 71 +++ - .../aarch64/sme/acle-asm/sumopa_za32.c | 30 ++ - .../aarch64/sme/acle-asm/sumopa_za64.c | 32 ++ - .../aarch64/sme/acle-asm/sumops_za32.c | 30 ++ - .../aarch64/sme/acle-asm/sumops_za64.c | 32 ++ - .../aarch64/sme/acle-asm/test_sme_acle.h | 62 +++ - .../aarch64/sme/acle-asm/undef_za.c | 33 ++ - .../aarch64/sme/acle-asm/usmopa_za32.c | 30 ++ - .../aarch64/sme/acle-asm/usmopa_za64.c | 32 ++ - .../aarch64/sme/acle-asm/usmops_za32.c | 30 ++ - .../aarch64/sme/acle-asm/usmops_za64.c | 32 ++ - .../aarch64/sme/acle-asm/write_hor_za128.c | 193 ++++++++ - .../aarch64/sme/acle-asm/write_hor_za16.c | 133 ++++++ - .../aarch64/sme/acle-asm/write_hor_za32.c | 143 ++++++ - .../aarch64/sme/acle-asm/write_hor_za64.c | 133 ++++++ - .../aarch64/sme/acle-asm/write_hor_za8.c | 93 ++++ - .../aarch64/sme/acle-asm/write_ver_za128.c | 193 ++++++++ - .../aarch64/sme/acle-asm/write_ver_za16.c | 133 ++++++ - .../aarch64/sme/acle-asm/write_ver_za32.c | 143 ++++++ - .../aarch64/sme/acle-asm/write_ver_za64.c | 133 ++++++ - .../aarch64/sme/acle-asm/write_ver_za8.c | 93 ++++ - .../aarch64/sme/acle-asm/zero_mask_za.c | 130 ++++++ - .../gcc.target/aarch64/sme/acle-asm/zero_za.c | 11 + - .../aarch64/sve/acle/asm/test_sve_acle.h | 14 +- - .../sve/acle/general-c/binary_za_int_m_1.c | 50 ++ - .../sve/acle/general-c/binary_za_m_1.c | 49 ++ - .../sve/acle/general-c/binary_za_m_2.c | 11 + - .../sve/acle/general-c/binary_za_uint_m_1.c | 50 ++ - .../aarch64/sve/acle/general-c/func_redef_4.c | 3 +- - .../aarch64/sve/acle/general-c/func_redef_5.c | 1 + - .../aarch64/sve/acle/general-c/read_za_m_1.c | 48 ++ - .../aarch64/sve/acle/general-c/unary_za_m_1.c | 49 ++ - .../aarch64/sve/acle/general-c/write_za_m_1.c | 48 ++ - gcc/testsuite/lib/target-supports.exp | 3 +- - 140 files changed, 13816 insertions(+), 78 deletions(-) - create mode 100644 gcc/config/aarch64/aarch64-sve-builtins-sme.cc - create mode 100644 gcc/config/aarch64/aarch64-sve-builtins-sme.def - create mode 100644 gcc/config/aarch64/aarch64-sve-builtins-sme.h - create mode 100644 gcc/config/aarch64/arm_sme.h - create mode 100644 gcc/testsuite/g++.target/aarch64/sme/aarch64-sme-acle-asm.exp - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_has_sme_sc.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_ns.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_s.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_sc.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_s.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_sc.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_s.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_sc.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_s.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_sc.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_s.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_sc.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za128.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za16.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za8.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za128.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za16.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za8.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za128.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za16.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za8.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za128.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za16.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za8.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_s.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_sc.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_s.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_sc.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za16.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za16.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za128.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za16.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za8.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za128.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za16.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za8.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za128.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za16.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za8.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za128.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za16.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za8.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_s.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_sc.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_s.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_sc.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/undef_za.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za16.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za16.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za32.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za64.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_za.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_int_m_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_2.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_uint_m_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/read_za_m_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_m_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_m_1.c - -diff --git a/gcc/config.gcc b/gcc/config.gcc -index da66603cd..19b21a280 100644 ---- a/gcc/config.gcc -+++ b/gcc/config.gcc -@@ -325,11 +325,11 @@ m32c*-*-*) - ;; - aarch64*-*-*) - cpu_type=aarch64 -- extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h" -+ extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h arm_sme.h" - c_target_objs="aarch64-c.o" - cxx_target_objs="aarch64-c.o" - d_target_objs="aarch64-d.o" -- extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o aarch64-sve-builtins-sve2.o cortex-a57-fma-steering.o aarch64-speculation.o falkor-tag-collision-avoidance.o aarch64-bti-insert.o aarch64-cc-fusion.o" -+ extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o aarch64-sve-builtins-sve2.o aarch64-sve-builtins-sme.o cortex-a57-fma-steering.o aarch64-speculation.o falkor-tag-collision-avoidance.o aarch64-bti-insert.o aarch64-cc-fusion.o" - target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.cc \$(srcdir)/config/aarch64/aarch64-sve-builtins.h \$(srcdir)/config/aarch64/aarch64-sve-builtins.cc" - target_has_targetm_common=yes - ;; -diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc -index 76c20848f..cb8a6c2fc 100644 ---- a/gcc/config/aarch64/aarch64-c.cc -+++ b/gcc/config/aarch64/aarch64-c.cc -@@ -250,6 +250,10 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) - "__ARM_FEATURE_LS64", pfile); - aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile); - -+ aarch64_def_or_undef (TARGET_SME, "__ARM_FEATURE_SME", pfile); -+ aarch64_def_or_undef (TARGET_SME_I16I64, "__ARM_FEATURE_SME_I16I64", pfile); -+ aarch64_def_or_undef (TARGET_SME_F64F64, "__ARM_FEATURE_SME_F64F64", pfile); -+ - /* Not for ACLE, but required to keep "float.h" correct if we switch - target between implementations that do or do not support ARMv8.2-A - 16-bit floating-point extensions. */ -@@ -347,6 +351,8 @@ aarch64_pragma_aarch64 (cpp_reader *) - const char *name = TREE_STRING_POINTER (x); - if (strcmp (name, "arm_sve.h") == 0) - aarch64_sve::handle_arm_sve_h (); -+ else if (strcmp (name, "arm_sme.h") == 0) -+ aarch64_sve::handle_arm_sme_h (); - else if (strcmp (name, "arm_neon.h") == 0) - handle_arm_neon_h (); - else if (strcmp (name, "arm_acle.h") == 0) -diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def -index faee64a79..98854dbce 100644 ---- a/gcc/config/aarch64/aarch64-option-extensions.def -+++ b/gcc/config/aarch64/aarch64-option-extensions.def -@@ -151,4 +151,8 @@ AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "") - - AARCH64_OPT_EXTENSION("sme", SME, (BF16, SVE2), (), (), "sme") - -+AARCH64_OPT_EXTENSION("sme-i16i64", SME_I16I64, (SME), (), (), "") -+ -+AARCH64_OPT_EXTENSION("sme-f64f64", SME_F64F64, (SME), (), (), "") -+ - #undef AARCH64_OPT_EXTENSION -diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h -index 0883ddd1a..81900fa83 100644 ---- a/gcc/config/aarch64/aarch64-protos.h -+++ b/gcc/config/aarch64/aarch64-protos.h -@@ -809,7 +809,11 @@ bool aarch64_sve_vector_inc_dec_immediate_p (rtx); - int aarch64_add_offset_temporaries (rtx); - void aarch64_split_add_offset (scalar_int_mode, rtx, rtx, rtx, rtx, rtx); - bool aarch64_rdsvl_immediate_p (const_rtx); -+rtx aarch64_sme_vq_immediate (machine_mode mode, HOST_WIDE_INT, -+ aarch64_feature_flags); - char *aarch64_output_rdsvl (const_rtx); -+bool aarch64_addsvl_addspl_immediate_p (const_rtx); -+char *aarch64_output_addsvl_addspl (rtx); - bool aarch64_mov_operand_p (rtx, machine_mode); - rtx aarch64_reverse_mask (machine_mode, unsigned int); - bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64); -@@ -853,6 +857,7 @@ int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &); - bool aarch64_is_mov_xn_imm (unsigned HOST_WIDE_INT); - bool aarch64_use_return_insn_p (void); - const char *aarch64_output_casesi (rtx *); -+const char *aarch64_output_sme_zero_za (rtx); - - arm_pcs aarch64_tlsdesc_abi_id (); - enum aarch64_symbol_type aarch64_classify_symbol (rtx, HOST_WIDE_INT); -@@ -867,7 +872,6 @@ int aarch64_uxt_size (int, HOST_WIDE_INT); - int aarch64_vec_fpconst_pow_of_2 (rtx); - rtx aarch64_eh_return_handler_rtx (void); - rtx aarch64_mask_from_zextract_ops (rtx, rtx); --const char *aarch64_output_move_struct (rtx *operands); - rtx aarch64_return_addr_rtx (void); - rtx aarch64_return_addr (int, rtx); - rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT); -@@ -881,6 +885,7 @@ bool aarch64_sve_ldnf1_operand_p (rtx); - bool aarch64_sve_ldr_operand_p (rtx); - bool aarch64_sve_prefetch_operand_p (rtx, machine_mode); - bool aarch64_sve_struct_memory_operand_p (rtx); -+bool aarch64_sme_ldr_vnum_offset_p (rtx, rtx); - rtx aarch64_simd_vect_par_cnst_half (machine_mode, int, bool); - rtx aarch64_gen_stepped_int_parallel (unsigned int, int, int); - bool aarch64_stepped_int_parallel_p (rtx, int); -@@ -1000,6 +1005,7 @@ void handle_arm_neon_h (void); - namespace aarch64_sve { - void init_builtins (); - void handle_arm_sve_h (); -+ void handle_arm_sme_h (); - tree builtin_decl (unsigned, bool); - bool builtin_type_p (const_tree); - bool builtin_type_p (const_tree, unsigned int *, unsigned int *); -diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md -index d4973098e..da0745f65 100644 ---- a/gcc/config/aarch64/aarch64-sme.md -+++ b/gcc/config/aarch64/aarch64-sme.md -@@ -24,6 +24,19 @@ - ;; ---- Test current state - ;; ---- PSTATE.SM management - ;; ---- PSTATE.ZA management -+;; -+;; == Loads, stores and moves -+;; ---- Single-vector loads -+;; ---- Single-vector stores -+;; ---- Single-vector moves -+;; ---- Zeroing -+;; -+;; == Binary arithmetic -+;; ---- Binary arithmetic on ZA tile -+;; -+;; == Ternary arithmetic -+;; ---- [INT] Sum of outer products -+;; ---- [FP] Sum of outer products - - ;; ========================================================================= - ;; == State management -@@ -456,3 +469,363 @@ - DONE; - } - ) -+ -+;; ========================================================================= -+;; == Loads, stores and moves -+;; ========================================================================= -+ -+;; ------------------------------------------------------------------------- -+;; ---- Single-vector loads -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - LD1 -+;; - LDR -+;; ------------------------------------------------------------------------- -+ -+(define_c_enum "unspec" [ -+ UNSPEC_SME_LDR -+]) -+ -+(define_insn "@aarch64_sme_" -+ [(set (reg:SME_ZA_I ZA_REGNUM) -+ (unspec:SME_ZA_I -+ [(reg:SME_ZA_I ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (match_operand:DI 0 "const_int_operand") -+ (match_operand:SI 1 "register_operand" "Ucj") -+ (match_operand: 2 "register_operand" "Upl") -+ (match_operand:SME_ZA_I 3 "aarch64_sve_ldff1_operand" "Utf")] -+ SME_LD1))] -+ "TARGET_STREAMING_SME" -+ "ld1\t{ za%0.[%w1, 0] }, %2/z, %3" -+) -+ -+(define_insn "@aarch64_sme__plus" -+ [(set (reg:SME_ZA_I ZA_REGNUM) -+ (unspec:SME_ZA_I -+ [(reg:SME_ZA_I ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (match_operand:DI 0 "const_int_operand") -+ (plus:SI (match_operand:SI 1 "register_operand" "Ucj") -+ (match_operand:SI 2 "const_int_operand")) -+ (match_operand: 3 "register_operand" "Upl") -+ (match_operand:SME_ZA_I 4 "aarch64_sve_ldff1_operand" "Utf")] -+ SME_LD1))] -+ "TARGET_STREAMING_SME -+ && UINTVAL (operands[2]) < 128 / " -+ "ld1\t{ za%0.[%w1, %2] }, %3/z, %4" -+) -+ -+(define_insn "aarch64_sme_ldr0" -+ [(set (reg:VNx16QI ZA_REGNUM) -+ (unspec:VNx16QI -+ [(reg:VNx16QI ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (match_operand:SI 0 "register_operand" "Ucj") -+ (mem:VNx16QI (match_operand 1 "pmode_register_operand" "rk"))] -+ UNSPEC_SME_LDR))] -+ "TARGET_SME" -+ "ldr\tza[%w0, 0], [%1, #0, mul vl]" -+) -+ -+(define_insn "@aarch64_sme_ldrn" -+ [(set (reg:VNx16QI ZA_REGNUM) -+ (unspec:VNx16QI -+ [(reg:VNx16QI ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (plus:SI (match_operand:SI 0 "register_operand" "Ucj") -+ (match_operand:SI 1 "const_int_operand")) -+ (mem:VNx16QI -+ (plus:P (match_operand:P 2 "register_operand" "rk") -+ (match_operand:P 3 "aarch64_mov_operand")))] -+ UNSPEC_SME_LDR))] -+ "TARGET_SME -+ && aarch64_sme_ldr_vnum_offset_p (operands[1], operands[3])" -+ "ldr\tza[%w0, %1], [%2, #%1, mul vl]" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- Single-vector stores -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - ST1 -+;; - STR -+;; ------------------------------------------------------------------------- -+ -+(define_c_enum "unspec" [ -+ UNSPEC_SME_STR -+]) -+ -+(define_insn "@aarch64_sme_" -+ [(set (match_operand:SME_ZA_I 0 "aarch64_sve_ldff1_operand" "+Utf") -+ (unspec:SME_ZA_I -+ [(reg:SME_ZA_I ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (match_dup 0) -+ (match_operand:DI 1 "const_int_operand") -+ (match_operand:SI 2 "register_operand" "Ucj") -+ (match_operand: 3 "register_operand" "Upl")] -+ SME_ST1))] -+ "TARGET_STREAMING_SME" -+ "st1\t{ za%1.[%w2, 0] }, %3, %0" -+) -+ -+(define_insn "@aarch64_sme__plus" -+ [(set (match_operand:SME_ZA_I 0 "aarch64_sve_ldff1_operand" "+Utf") -+ (unspec:SME_ZA_I -+ [(reg:SME_ZA_I ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (match_dup 0) -+ (match_operand:DI 1 "const_int_operand") -+ (plus:SI (match_operand:SI 2 "register_operand" "Ucj") -+ (match_operand:SI 3 "const_int_operand")) -+ (match_operand: 4 "register_operand" "Upl")] -+ SME_ST1))] -+ "TARGET_STREAMING_SME -+ && UINTVAL (operands[3]) < 128 / " -+ "st1\t{ za%1.[%w2, %3] }, %4, %0" -+) -+ -+(define_insn "aarch64_sme_str0" -+ [(set (mem:VNx16QI (match_operand 1 "pmode_register_operand" "rk")) -+ (unspec:VNx16QI -+ [(reg:VNx16QI ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (mem:VNx16QI (match_dup 1)) -+ (match_operand:SI 0 "register_operand" "Ucj")] -+ UNSPEC_SME_STR))] -+ "TARGET_SME" -+ "str\tza[%w0, 0], [%1, #0, mul vl]" -+) -+ -+(define_insn "@aarch64_sme_strn" -+ [(set (mem:VNx16QI -+ (plus:P (match_operand:P 2 "register_operand" "rk") -+ (match_operand:P 3 "aarch64_mov_operand"))) -+ (unspec:VNx16QI -+ [(reg:VNx16QI ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (mem:VNx16QI (plus:P (match_dup 2) (match_dup 3))) -+ (plus:SI (match_operand:SI 0 "register_operand" "Ucj") -+ (match_operand:SI 1 "const_int_operand"))] -+ UNSPEC_SME_STR))] -+ "TARGET_SME -+ && aarch64_sme_ldr_vnum_offset_p (operands[1], operands[3])" -+ "str\tza[%w0, %1], [%2, #%1, mul vl]" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- Single-vector moves -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - MOVA -+;; ------------------------------------------------------------------------- -+ -+(define_insn "@aarch64_sme_" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") -+ (unspec:SVE_FULL -+ [(reg: ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (match_operand:SVE_FULL 1 "register_operand" "0") -+ (match_operand: 2 "register_operand" "Upl") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:SI 4 "register_operand" "Ucj")] -+ SME_READ))] -+ "TARGET_STREAMING_SME" -+ "mova\t%0., %2/m, za%3.[%w4, 0]" -+) -+ -+(define_insn "*aarch64_sme__plus" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") -+ (unspec:SVE_FULL -+ [(reg: ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (match_operand:SVE_FULL 1 "register_operand" "0") -+ (match_operand: 2 "register_operand" "Upl") -+ (match_operand:DI 3 "const_int_operand") -+ (plus:SI (match_operand:SI 4 "register_operand" "Ucj") -+ (match_operand:SI 5 "const_int_operand"))] -+ SME_READ))] -+ "TARGET_STREAMING_SME -+ && UINTVAL (operands[5]) < 128 / " -+ "mova\t%0., %2/m, za%3.[%w4, %5]" -+) -+ -+(define_insn "@aarch64_sme_" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") -+ (unspec:SVE_FULL -+ [(reg:VNx1TI_ONLY ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (match_operand:SVE_FULL 1 "register_operand" "0") -+ (match_operand:VNx2BI 2 "register_operand" "Upl") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:SI 4 "register_operand" "Ucj")] -+ SME_READ))] -+ "TARGET_STREAMING_SME" -+ "mova\t%0.q, %2/m, za%3.q[%w4, 0]" -+) -+ -+(define_insn "@aarch64_sme_" -+ [(set (reg: ZA_REGNUM) -+ (unspec: -+ [(reg:SVE_FULL ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (match_operand:DI 0 "const_int_operand") -+ (match_operand:SI 1 "register_operand" "Ucj") -+ (match_operand: 2 "register_operand" "Upl") -+ (match_operand:SVE_FULL 3 "register_operand" "w")] -+ SME_WRITE))] -+ "TARGET_STREAMING_SME" -+ "mova\tza%0.[%w1, 0], %2/m, %3." -+) -+ -+(define_insn "*aarch64_sme__plus" -+ [(set (reg: ZA_REGNUM) -+ (unspec: -+ [(reg:SVE_FULL ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (match_operand:DI 0 "const_int_operand") -+ (plus:SI (match_operand:SI 1 "register_operand" "Ucj") -+ (match_operand:SI 2 "const_int_operand")) -+ (match_operand: 3 "register_operand" "Upl") -+ (match_operand:SVE_FULL 4 "register_operand" "w")] -+ SME_WRITE))] -+ "TARGET_STREAMING_SME -+ && UINTVAL (operands[2]) < 128 / " -+ "mova\tza%0.[%w1, %2], %3/m, %4." -+) -+ -+(define_insn "@aarch64_sme_" -+ [(set (reg:VNx1TI_ONLY ZA_REGNUM) -+ (unspec:VNx1TI_ONLY -+ [(reg:VNx1TI_ONLY ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (match_operand:DI 0 "const_int_operand") -+ (match_operand:SI 1 "register_operand" "Ucj") -+ (match_operand:VNx2BI 2 "register_operand" "Upl") -+ (match_operand:SVE_FULL 3 "register_operand" "w")] -+ SME_WRITE))] -+ "TARGET_STREAMING_SME" -+ "mova\tza%0.q[%w1, 0], %2/m, %3.q" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- Zeroing -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - ZERO -+;; ------------------------------------------------------------------------- -+ -+(define_c_enum "unspec" [UNSPEC_SME_ZERO]) -+ -+(define_insn "aarch64_sme_zero_za" -+ [(set (reg:VNx16QI ZA_REGNUM) -+ (unspec:VNx16QI [(reg:VNx16QI ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (match_operand:DI 0 "const_int_operand")] -+ UNSPEC_SME_ZERO))] -+ "TARGET_SME" -+ { -+ return aarch64_output_sme_zero_za (operands[0]); -+ } -+) -+ -+;; ========================================================================= -+;; == Binary arithmetic -+;; ========================================================================= -+ -+;; ------------------------------------------------------------------------- -+;; ---- Binary arithmetic on ZA tile -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - ADDHA -+;; - ADDVA -+;; ------------------------------------------------------------------------- -+ -+(define_insn "@aarch64_sme_" -+ [(set (reg:SME_ZA_SDI ZA_REGNUM) -+ (unspec:SME_ZA_SDI -+ [(reg:SME_ZA_SDI ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (match_operand:DI 0 "const_int_operand") -+ (match_operand: 1 "register_operand" "Upl") -+ (match_operand: 2 "register_operand" "Upl") -+ (match_operand:SME_ZA_SDI 3 "register_operand" "w")] -+ SME_BINARY_SDI))] -+ "TARGET_STREAMING_SME" -+ "\tza%0., %1/m, %2/m, %3." -+) -+ -+;; ========================================================================= -+;; == Ternary arithmetic -+;; ========================================================================= -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Sum of outer products -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - SMOPA -+;; - SMOPS -+;; - SUMOPA -+;; - SUMOPS -+;; - UMOPA -+;; - UMOPS -+;; - USMOPA -+;; - USMOPS -+;; ------------------------------------------------------------------------- -+ -+(define_insn "@aarch64_sme_" -+ [(set (reg:VNx4SI_ONLY ZA_REGNUM) -+ (unspec:VNx4SI_ONLY -+ [(reg:VNx4SI_ONLY ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (match_operand:DI 0 "const_int_operand") -+ (match_operand: 1 "register_operand" "Upl") -+ (match_operand: 2 "register_operand" "Upl") -+ (match_operand:VNx16QI_ONLY 3 "register_operand" "w") -+ (match_operand:VNx16QI_ONLY 4 "register_operand" "w")] -+ SME_INT_MOP))] -+ "TARGET_STREAMING_SME" -+ "\tza%0.s, %1/m, %2/m, %3.b, %4.b" -+) -+ -+(define_insn "@aarch64_sme_" -+ [(set (reg:VNx2DI_ONLY ZA_REGNUM) -+ (unspec:VNx2DI_ONLY -+ [(reg:VNx2DI_ONLY ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (match_operand:DI 0 "const_int_operand") -+ (match_operand: 1 "register_operand" "Upl") -+ (match_operand: 2 "register_operand" "Upl") -+ (match_operand:VNx8HI_ONLY 3 "register_operand" "w") -+ (match_operand:VNx8HI_ONLY 4 "register_operand" "w")] -+ SME_INT_MOP))] -+ "TARGET_STREAMING_SME && TARGET_SME_I16I64" -+ "\tza%0.d, %1/m, %2/m, %3.h, %4.h" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [FP] Sum of outer products -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - BFMOPA -+;; - BFMOPS -+;; - FMOPA -+;; - FMOPS -+;; ------------------------------------------------------------------------- -+ -+(define_insn "@aarch64_sme_" -+ [(set (reg:SME_ZA_SDF_I ZA_REGNUM) -+ (unspec:SME_ZA_SDF_I -+ [(reg:SME_ZA_SDF_I ZA_REGNUM) -+ (reg:DI SME_STATE_REGNUM) -+ (match_operand:DI 0 "const_int_operand") -+ (match_operand: 1 "register_operand" "Upl") -+ (match_operand: 2 "register_operand" "Upl") -+ (match_operand:SME_MOP_HSDF 3 "register_operand" "w") -+ (match_operand:SME_MOP_HSDF 4 "register_operand" "w")] -+ SME_FP_MOP))] -+ "TARGET_STREAMING_SME -+ && ( == 32) == ( <= 32)" -+ "\tza%0., %1/m, %2/m, %3., %4." -+) -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h -index f5fa4030c..9dfce5c0e 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h -+++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h -@@ -50,6 +50,27 @@ public: - } - }; - -+/* Wrap T, which is derived from function_base, and indicate that it -+ additionally has the call properties in PROPERTIES. */ -+template -+class add_call_properties : public T -+{ -+public: -+ using T::T; -+ -+ unsigned int -+ call_properties (const function_instance &fi) const override -+ { -+ return T::call_properties (fi) | PROPERTIES; -+ } -+}; -+ -+template -+using read_write_za = add_call_properties; -+ -+template -+using write_za = add_call_properties; -+ - /* A function_base that sometimes or always operates on tuples of - vectors. */ - class multi_vector_function : public function_base -@@ -383,6 +404,49 @@ typedef unspec_based_function_exact_insn - typedef unspec_based_function_exact_insn - unspec_based_sub_lane_function; - -+/* General SME unspec-based functions, parameterized on the vector mode. */ -+class sme_1mode_function : public read_write_za -+{ -+public: -+ using parent = read_write_za; -+ -+ CONSTEXPR sme_1mode_function (int unspec_for_sint, int unspec_for_uint, -+ int unspec_for_fp) -+ : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, 1) -+ {} -+ -+ rtx -+ expand (function_expander &e) const override -+ { -+ auto icode = code_for_aarch64_sme (unspec_for (e), e.tuple_mode (1)); -+ return e.use_exact_insn (icode); -+ } -+}; -+ -+/* General SME unspec-based functions, parameterized on both the ZA mode -+ and the vector mode. */ -+template -+class sme_2mode_function_t : public read_write_za -+{ -+public: -+ using parent = read_write_za; -+ -+ CONSTEXPR sme_2mode_function_t (int unspec_for_sint, int unspec_for_uint, -+ int unspec_for_fp) -+ : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, 1) -+ {} -+ -+ rtx -+ expand (function_expander &e) const override -+ { -+ insn_code icode = CODE (unspec_for (e), e.vector_mode (0), -+ e.tuple_mode (1)); -+ return e.use_exact_insn (icode); -+ } -+}; -+ -+using sme_2mode_function = sme_2mode_function_t; -+ - /* A function that acts like unspec_based_function_exact_insn - when operating on integers, but that expands to an (fma ...)-style - aarch64_sve* operation when applied to floats. */ -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -index c536949ba..bdde849c8 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -@@ -59,7 +59,10 @@ static void - apply_predication (const function_instance &instance, tree return_type, - vec &argument_types) - { -- if (instance.pred != PRED_none) -+ /* There are currently no SME ZA instructions that have both merging and -+ unpredicated forms, so for simplicity, the predicates are always included -+ in the original format string. */ -+ if (instance.pred != PRED_none && instance.pred != PRED_za_m) - { - argument_types.quick_insert (0, get_svbool_t ()); - /* For unary merge operations, the first argument is a vector with -@@ -589,6 +592,33 @@ struct binary_imm_long_base : public overloaded_base<0> - } - }; - -+/* Base class for binary_za_m and similar shapes. */ -+template -+struct binary_za_m_base : public overloaded_base<1> -+{ -+ tree -+ resolve (function_resolver &r) const override -+ { -+ type_suffix_index type; -+ if (!r.check_num_arguments (5) -+ || !r.require_integer_immediate (0) -+ || !r.require_vector_type (1, VECTOR_TYPE_svbool_t) -+ || !r.require_vector_type (2, VECTOR_TYPE_svbool_t) -+ || (type = r.infer_vector_type (3)) == NUM_TYPE_SUFFIXES -+ || !r.require_derived_vector_type (4, 3, type, TCLASS, BITS)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type); -+ } -+ -+ bool -+ check (function_checker &c) const override -+ { -+ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1); -+ } -+}; -+ - /* Base class for inc_dec and inc_dec_pat. */ - struct inc_dec_base : public overloaded_base<0> - { -@@ -1576,6 +1606,68 @@ struct binary_wide_opt_n_def : public overloaded_base<0> - }; - SHAPE (binary_wide_opt_n) - -+/* void svfoo_t0[_t1]_g(uint64_t, svbool_t, svbool_t, svx_t, -+ svx_t) -+ -+ where the first argument is a ZA tile. */ -+struct binary_za_int_m_def : public binary_za_m_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const override -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "_,su64,vp,vp,t1,ts1", group, MODE_none); -+ } -+}; -+SHAPE (binary_za_int_m) -+ -+/* void svfoo_t0[_t1]_g(uint64_t, svbool_t, svbool_t, svx_t, -+ svx_t) -+ -+ where the first argument is a ZA tile. */ -+struct binary_za_m_def : public binary_za_m_base<> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const override -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ /* Allow the overloaded form to be specified seperately, with just -+ a single suffix. This is necessary for the 64-bit SME MOP intrinsics, -+ which have some forms dependent on FEAT_SME_I16I64 and some forms -+ dependent on FEAT_SME_F64F64. The resolver needs to be defined -+ for base SME. */ -+ if (group.types[0][1] != NUM_TYPE_SUFFIXES) -+ build_all (b, "_,su64,vp,vp,t1,t1", group, MODE_none); -+ } -+}; -+SHAPE (binary_za_m) -+ -+/* void svfoo_t0[_t1]_g(uint64_t, svbool_t, svbool_t, svx_t, -+ svx_t) -+ -+ where the first argument is a ZA tile. */ -+struct binary_za_uint_m_def : public binary_za_m_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const override -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "_,su64,vp,vp,t1,tu1", group, MODE_none); -+ } -+}; -+SHAPE (binary_za_uint_m) -+ -+/* bool svfoo(). */ -+struct bool_inherent_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const override -+ { -+ build_all (b, "sp", group, MODE_none); -+ } -+}; -+SHAPE (bool_inherent) -+ - /* sv_t svfoo[_t0](sv_t, sv_t) - _t svfoo[_n_t0](_t, sv_t). */ - struct clast_def : public overloaded_base<0> -@@ -2055,6 +2147,51 @@ struct inherent_b_def : public overloaded_base<0> - }; - SHAPE (inherent_b) - -+/* void svfoo_t0(). */ -+struct inherent_za_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const override -+ { -+ build_all (b, "_", group, MODE_none); -+ } -+}; -+SHAPE (inherent_za) -+ -+/* void svfoo_t0(uint64_t) -+ -+ where the argument is an integer constant that specifies an 8-bit mask. */ -+struct inherent_mask_za_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const override -+ { -+ build_all (b, "_,su64", group, MODE_none); -+ } -+ -+ bool -+ check (function_checker &c) const override -+ { -+ return c.require_immediate_range (0, 0, 255); -+ } -+}; -+SHAPE (inherent_mask_za) -+ -+/* void svfoo_t0(uint32_t, const void *) -+ void svfoo_vnum_t0(uint32_t, const void *, int64_t) -+ -+ where the first argument is a variable ZA slice. */ -+struct ldr_za_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const override -+ { -+ build_all (b, "_,su32,al", group, MODE_none); -+ build_all (b, "_,su32,al,ss64", group, MODE_vnum); -+ } -+}; -+SHAPE (ldr_za) -+ - /* sv[xN]_t svfoo[_t0](const _t *) - sv[xN]_t svfoo_vnum[_t0](const _t *, int64_t). */ - struct load_def : public load_contiguous_base -@@ -2265,6 +2402,27 @@ struct load_replicate_def : public load_contiguous_base - }; - SHAPE (load_replicate) - -+/* void svfoo_t0(uint64_t, uint32_t, svbool_t, const void *) -+ void svfoo_vnum_t0(uint64_t, uint32_t, svbool_t, const void *, int64_t) -+ -+ where the first two fields form a (ZA tile, slice) pair. */ -+struct load_za_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const override -+ { -+ build_all (b, "_,su64,su32,vp,al", group, MODE_none); -+ build_all (b, "_,su64,su32,vp,al,ss64", group, MODE_vnum); -+ } -+ -+ bool -+ check (function_checker &c) const override -+ { -+ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1); -+ } -+}; -+SHAPE (load_za) -+ - /* svbool_t svfoo(enum svpattern). */ - struct pattern_pred_def : public nonoverloaded_base - { -@@ -2359,6 +2517,48 @@ struct rdffr_def : public nonoverloaded_base - }; - SHAPE (rdffr) - -+/* sv_t svfoo_t0[_t1](uint64_t, uint32_t) -+ -+ where the first two fields form a (ZA tile, slice) pair. */ -+struct read_za_m_def : public overloaded_base<1> -+{ -+ bool -+ has_merge_argument_p (const function_instance &, unsigned int) const override -+ { -+ return true; -+ } -+ -+ void -+ build (function_builder &b, const function_group_info &group) const override -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "t1,su64,su32", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const override -+ { -+ gcc_assert (r.pred == PRED_m); -+ type_suffix_index type; -+ if (!r.check_num_arguments (4) -+ || (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES -+ || !r.require_vector_type (1, VECTOR_TYPE_svbool_t) -+ || !r.require_integer_immediate (2) -+ || !r.require_scalar_type (3, "uint32_t")) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type); -+ } -+ -+ bool -+ check (function_checker &c) const override -+ { -+ gcc_assert (c.pred == PRED_m); -+ return c.require_immediate_range (1, 0, c.num_za_tiles () - 1); -+ } -+}; -+SHAPE (read_za_m) -+ - /* _t svfoo[_t0](sv_t). */ - struct reduction_def : public overloaded_base<0> - { -@@ -2727,6 +2927,42 @@ struct store_scatter_offset_restricted_def : public store_scatter_base - }; - SHAPE (store_scatter_offset_restricted) - -+/* void svfoo_t0(uint64_t, uint32_t, svbool_t, void *) -+ void svfoo_vnum_t0(uint64_t, uint32_t, svbool_t, void *, int64_t) -+ -+ where the first two fields form a (ZA tile, slice) pair. */ -+struct store_za_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const override -+ { -+ build_all (b, "_,su64,su32,vp,as", group, MODE_none); -+ build_all (b, "_,su64,su32,vp,as,ss64", group, MODE_vnum); -+ } -+ -+ bool -+ check (function_checker &c) const override -+ { -+ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1); -+ } -+}; -+SHAPE (store_za) -+ -+/* void svfoo_t0(uint32_t, void *) -+ void svfoo_vnum_t0(uint32_t, void *, int64_t) -+ -+ where the first argument is a variable ZA slice. */ -+struct str_za_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const override -+ { -+ build_all (b, "_,su32,as", group, MODE_none); -+ build_all (b, "_,su32,as,ss64", group, MODE_vnum); -+ } -+}; -+SHAPE (str_za) -+ - /* sv_t svfoo[_t0](svxN_t, sv_t). */ - struct tbl_tuple_def : public overloaded_base<0> - { -@@ -3487,4 +3723,72 @@ struct unary_widen_def : public overloaded_base<0> - }; - SHAPE (unary_widen) - -+/* void svfoo_t0[_t1](uint64_t, svbool_t, svbool_t, sv_t) -+ -+ where the first argument is a ZA tile. */ -+struct unary_za_m_def : public overloaded_base<1> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const override -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "_,su64,vp,vp,t1", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const override -+ { -+ type_suffix_index type; -+ if (!r.check_num_arguments (4) -+ || !r.require_integer_immediate (0) -+ || !r.require_vector_type (1, VECTOR_TYPE_svbool_t) -+ || !r.require_vector_type (2, VECTOR_TYPE_svbool_t) -+ || (type = r.infer_vector_type (3)) == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type); -+ } -+ -+ bool -+ check (function_checker &c) const override -+ { -+ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1); -+ } -+}; -+SHAPE (unary_za_m) -+ -+/* void svfoo_t0[_t1](uint64_t, uint32_t, svbool_t, sv_t) -+ -+ where the first two fields form a (ZA tile, slice) pair. */ -+struct write_za_m_def : public overloaded_base<1> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const override -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "_,su64,su32,vp,t1", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const override -+ { -+ type_suffix_index type; -+ if (!r.check_num_arguments (4) -+ || !r.require_integer_immediate (0) -+ || !r.require_scalar_type (1, "uint32_t") -+ || !r.require_vector_type (2, VECTOR_TYPE_svbool_t) -+ || (type = r.infer_vector_type (3)) == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type); -+ } -+ -+ bool -+ check (function_checker &c) const override -+ { -+ return c.require_immediate_range (0, 0, c.num_za_tiles () - 1); -+ } -+}; -+SHAPE (write_za_m) -+ - } -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h -index 2b06152d4..9c1f44bdc 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h -+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h -@@ -93,6 +93,10 @@ namespace aarch64_sve - extern const function_shape *const binary_uint64_opt_n; - extern const function_shape *const binary_wide; - extern const function_shape *const binary_wide_opt_n; -+ extern const function_shape *const binary_za_int_m; -+ extern const function_shape *const binary_za_m; -+ extern const function_shape *const binary_za_uint_m; -+ extern const function_shape *const bool_inherent; - extern const function_shape *const clast; - extern const function_shape *const compare; - extern const function_shape *const compare_opt_n; -@@ -114,6 +118,9 @@ namespace aarch64_sve - extern const function_shape *const inc_dec_pred_scalar; - extern const function_shape *const inherent; - extern const function_shape *const inherent_b; -+ extern const function_shape *const inherent_za; -+ extern const function_shape *const inherent_mask_za; -+ extern const function_shape *const ldr_za; - extern const function_shape *const load; - extern const function_shape *const load_ext; - extern const function_shape *const load_ext_gather_index; -@@ -124,6 +131,7 @@ namespace aarch64_sve - extern const function_shape *const load_gather_sv_restricted; - extern const function_shape *const load_gather_vs; - extern const function_shape *const load_replicate; -+ extern const function_shape *const load_za; - extern const function_shape *const mmla; - extern const function_shape *const pattern_pred; - extern const function_shape *const prefetch; -@@ -131,6 +139,7 @@ namespace aarch64_sve - extern const function_shape *const prefetch_gather_offset; - extern const function_shape *const ptest; - extern const function_shape *const rdffr; -+ extern const function_shape *const read_za_m; - extern const function_shape *const reduction; - extern const function_shape *const reduction_wide; - extern const function_shape *const reinterpret; -@@ -148,6 +157,8 @@ namespace aarch64_sve - extern const function_shape *const store_scatter_index_restricted; - extern const function_shape *const store_scatter_offset; - extern const function_shape *const store_scatter_offset_restricted; -+ extern const function_shape *const store_za; -+ extern const function_shape *const str_za; - extern const function_shape *const tbl_tuple; - extern const function_shape *const ternary_bfloat; - extern const function_shape *const ternary_bfloat_lane; -@@ -186,6 +197,8 @@ namespace aarch64_sve - extern const function_shape *const unary_to_uint; - extern const function_shape *const unary_uint; - extern const function_shape *const unary_widen; -+ extern const function_shape *const unary_za_m; -+ extern const function_shape *const write_za_m; - } - } - -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc -new file mode 100644 -index 000000000..e1df6ce0d ---- /dev/null -+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc -@@ -0,0 +1,412 @@ -+/* ACLE support for AArch64 SME. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3, or (at your option) -+ any later version. -+ -+ GCC is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ . */ -+ -+#include "config.h" -+#include "system.h" -+#include "coretypes.h" -+#include "tm.h" -+#include "tree.h" -+#include "rtl.h" -+#include "tm_p.h" -+#include "memmodel.h" -+#include "insn-codes.h" -+#include "optabs.h" -+#include "recog.h" -+#include "expr.h" -+#include "basic-block.h" -+#include "function.h" -+#include "fold-const.h" -+#include "gimple.h" -+#include "gimple-iterator.h" -+#include "gimplify.h" -+#include "explow.h" -+#include "emit-rtl.h" -+#include "aarch64-sve-builtins.h" -+#include "aarch64-sve-builtins-shapes.h" -+#include "aarch64-sve-builtins-base.h" -+#include "aarch64-sve-builtins-sme.h" -+#include "aarch64-sve-builtins-functions.h" -+ -+using namespace aarch64_sve; -+ -+namespace { -+ -+class load_store_za_base : public function_base -+{ -+public: -+ tree -+ memory_scalar_type (const function_instance &) const override -+ { -+ return void_type_node; -+ } -+}; -+ -+class read_write_za_base : public function_base -+{ -+public: -+ constexpr read_write_za_base (int unspec) : m_unspec (unspec) {} -+ -+ rtx -+ expand (function_expander &e) const override -+ { -+ auto za_mode = e.vector_mode (0); -+ auto z_mode = e.vector_mode (1); -+ auto icode = (za_mode == VNx1TImode -+ ? code_for_aarch64_sme (m_unspec, za_mode, z_mode) -+ : code_for_aarch64_sme (m_unspec, z_mode, z_mode)); -+ return e.use_exact_insn (icode); -+ } -+ -+ int m_unspec; -+}; -+ -+using load_za_base = add_call_properties; -+ -+using store_za_base = add_call_properties; -+ -+/* E is a load or store intrinsic that accesses a ZA slice of mode MEM_MODE. -+ The intrinsic has a vnum parameter at index ARGNO. Return true if the -+ vnum argument is a constant that is a valid ZA offset for the underlying -+ instruction. */ -+ -+static bool -+has_in_range_vnum_arg (function_expander &e, machine_mode mem_mode, -+ unsigned int argno) -+{ -+ return (e.mode_suffix_id == MODE_vnum -+ && CONST_INT_P (e.args[argno]) -+ && UINTVAL (e.args[argno]) < 16 / GET_MODE_UNIT_SIZE (mem_mode)); -+} -+ -+/* E is a ZA load or store intrinsic that uses instruction ICODE. Add a -+ 32-bit operand that gives the total ZA slice. (The instruction hard-codes -+ the constant offset to 0, so there is no operand for that.) -+ -+ Argument ARGNO is the intrinsic's slice argument. If the intrinsic is -+ a _vnum intrinsic, argument VNUM_ARGNO is the intrinsic's vnum operand, -+ which must be added to the slice argument. */ -+ -+static void -+add_load_store_slice_operand (function_expander &e, insn_code icode, -+ unsigned int argno, unsigned int vnum_argno) -+{ -+ rtx base = e.args[argno]; -+ if (e.mode_suffix_id == MODE_vnum) -+ { -+ rtx vnum = lowpart_subreg (SImode, e.args[vnum_argno], DImode); -+ base = simplify_gen_binary (PLUS, SImode, base, vnum); -+ } -+ e.add_input_operand (icode, base); -+} -+ -+/* Add a memory operand for ZA LD1 or ST1 intrinsic E. BASE_ARGNO is -+ the index of the base argument. */ -+ -+static void -+add_load_store_operand (function_expander &e, unsigned int base_argno) -+{ -+ auto mode = e.vector_mode (0); -+ rtx base = e.get_contiguous_base (mode, base_argno, base_argno + 1, -+ AARCH64_FL_SM_ON); -+ auto mem = gen_rtx_MEM (mode, force_reg (Pmode, base)); -+ set_mem_align (mem, BITS_PER_UNIT); -+ e.add_fixed_operand (mem); -+} -+ -+/* Expand ZA LDR or STR intrinsic E. There are two underlying instructions: -+ -+ - BASE_CODE has a zero ZA slice offset -+ - VNUM_CODE has a constant operand for the ZA slice offset. */ -+ -+static rtx -+expand_ldr_str_za (function_expander &e, insn_code base_code, -+ insn_code vnum_code) -+{ -+ if (has_in_range_vnum_arg (e, VNx16QImode, 2)) -+ { -+ rtx mem_offset = aarch64_sme_vq_immediate (Pmode, -+ UINTVAL (e.args[2]) * 16, -+ AARCH64_ISA_MODE); -+ e.add_input_operand (vnum_code, e.args[0]); -+ e.add_input_operand (vnum_code, e.args[2]); -+ e.add_input_operand (vnum_code, e.args[1]); -+ e.add_input_operand (vnum_code, mem_offset); -+ return e.generate_insn (vnum_code); -+ } -+ else -+ { -+ rtx base = e.get_contiguous_base (VNx16QImode, 1, 2, AARCH64_FL_SM_ON); -+ add_load_store_slice_operand (e, base_code, 0, 2); -+ e.add_input_operand (base_code, base); -+ return e.generate_insn (base_code); -+ } -+} -+ -+/* Expand ZA LD1 or ST1 intrinsic E. UNSPEC is the load or store unspec. -+ IS_LOAD is true if E is a load, false if it is a store. */ -+ -+static rtx -+expand_ld1_st1 (function_expander &e, int unspec, bool is_load) -+{ -+ bool is_vnum = has_in_range_vnum_arg (e, e.vector_mode (0), 4); -+ auto icode = (is_vnum -+ ? code_for_aarch64_sme_plus (unspec, e.vector_mode (0)) -+ : code_for_aarch64_sme (unspec, e.vector_mode (0))); -+ if (!is_load) -+ add_load_store_operand (e, 3); -+ e.add_input_operand (icode, e.args[0]); -+ if (is_vnum) -+ { -+ e.add_input_operand (icode, e.args[1]); -+ e.add_input_operand (icode, e.args[4]); -+ } -+ else -+ add_load_store_slice_operand (e, icode, 1, 4); -+ e.add_input_operand (icode, e.args[2]); -+ if (is_load) -+ add_load_store_operand (e, 3); -+ return e.generate_insn (icode); -+} -+ -+class arm_has_sme_impl : public function_base -+{ -+ gimple * -+ fold (gimple_folder &f) const override -+ { -+ if (TARGET_SME) -+ return f.fold_to_cstu (1); -+ return nullptr; -+ } -+ -+ rtx -+ expand (function_expander &e) const override -+ { -+ if (TARGET_SME) -+ return const1_rtx; -+ emit_insn (gen_aarch64_get_sme_state ()); -+ return expand_simple_binop (DImode, LSHIFTRT, -+ gen_rtx_REG (DImode, R0_REGNUM), -+ gen_int_mode (63, QImode), -+ e.possible_target, true, OPTAB_LIB_WIDEN); -+ } -+}; -+ -+class arm_in_streaming_mode_impl : public function_base -+{ -+ gimple * -+ fold (gimple_folder &f) const override -+ { -+ if (TARGET_STREAMING) -+ return f.fold_to_cstu (1); -+ if (TARGET_NON_STREAMING) -+ return f.fold_to_cstu (0); -+ return nullptr; -+ } -+ -+ rtx -+ expand (function_expander &e) const override -+ { -+ if (TARGET_STREAMING) -+ return const1_rtx; -+ -+ if (TARGET_NON_STREAMING) -+ return const0_rtx; -+ -+ rtx reg; -+ if (TARGET_SME) -+ { -+ reg = gen_reg_rtx (DImode); -+ emit_insn (gen_aarch64_read_svcr (reg)); -+ } -+ else -+ { -+ emit_insn (gen_aarch64_get_sme_state ()); -+ reg = gen_rtx_REG (DImode, R0_REGNUM); -+ } -+ return expand_simple_binop (DImode, AND, reg, gen_int_mode (1, DImode), -+ e.possible_target, true, OPTAB_LIB_WIDEN); -+ } -+}; -+ -+/* Implements svcnts[bhwd]. */ -+class svcnts_bhwd_impl : public function_base -+{ -+public: -+ constexpr svcnts_bhwd_impl (machine_mode ref_mode) : m_ref_mode (ref_mode) {} -+ -+ unsigned int -+ get_shift () const -+ { -+ return exact_log2 (GET_MODE_UNIT_SIZE (m_ref_mode)); -+ } -+ -+ gimple * -+ fold (gimple_folder &f) const override -+ { -+ if (TARGET_STREAMING) -+ return f.fold_to_cstu (GET_MODE_NUNITS (m_ref_mode)); -+ return nullptr; -+ } -+ -+ rtx -+ expand (function_expander &e) const override -+ { -+ rtx cntsb = aarch64_sme_vq_immediate (DImode, 16, AARCH64_ISA_MODE); -+ auto shift = get_shift (); -+ if (!shift) -+ return cntsb; -+ -+ return expand_simple_binop (DImode, LSHIFTRT, cntsb, -+ gen_int_mode (shift, QImode), -+ e.possible_target, true, OPTAB_LIB_WIDEN); -+ } -+ -+ /* The mode of the vector associated with the [bhwd] suffix. */ -+ machine_mode m_ref_mode; -+}; -+ -+class svld1_za_impl : public load_za_base -+{ -+public: -+ constexpr svld1_za_impl (int unspec) : m_unspec (unspec) {} -+ -+ rtx -+ expand (function_expander &e) const override -+ { -+ return expand_ld1_st1 (e, m_unspec, true); -+ } -+ -+ int m_unspec; -+}; -+ -+class svldr_za_impl : public load_za_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const override -+ { -+ return expand_ldr_str_za (e, CODE_FOR_aarch64_sme_ldr0, -+ code_for_aarch64_sme_ldrn (Pmode)); -+ } -+}; -+ -+using svread_za_tile_impl = add_call_properties; -+ -+class svst1_za_impl : public store_za_base -+{ -+public: -+ constexpr svst1_za_impl (int unspec) : m_unspec (unspec) {} -+ -+ rtx -+ expand (function_expander &e) const override -+ { -+ return expand_ld1_st1 (e, m_unspec, false); -+ } -+ -+ int m_unspec; -+}; -+ -+class svstr_za_impl : public store_za_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const override -+ { -+ return expand_ldr_str_za (e, CODE_FOR_aarch64_sme_str0, -+ code_for_aarch64_sme_strn (Pmode)); -+ } -+}; -+ -+class svundef_za_impl : public write_za -+{ -+public: -+ rtx -+ expand (function_expander &) const override -+ { -+ rtx target = gen_rtx_REG (VNx16QImode, ZA_REGNUM); -+ emit_clobber (copy_rtx (target)); -+ return const0_rtx; -+ } -+}; -+ -+using svwrite_za_tile_impl = add_call_properties; -+ -+class svzero_mask_za_impl : public write_za -+{ -+public: -+ rtx -+ expand (function_expander &e) const override -+ { -+ return e.use_exact_insn (CODE_FOR_aarch64_sme_zero_za); -+ } -+}; -+ -+class svzero_za_impl : public write_za -+{ -+public: -+ rtx -+ expand (function_expander &) const override -+ { -+ emit_insn (gen_aarch64_sme_zero_za (gen_int_mode (0xff, SImode))); -+ return const0_rtx; -+ } -+}; -+ -+} /* end anonymous namespace */ -+ -+namespace aarch64_sve { -+ -+FUNCTION (arm_has_sme, arm_has_sme_impl, ) -+FUNCTION (arm_in_streaming_mode, arm_in_streaming_mode_impl, ) -+FUNCTION (svaddha_za, sme_1mode_function, (UNSPEC_SME_ADDHA, -+ UNSPEC_SME_ADDHA, -1)) -+FUNCTION (svaddva_za, sme_1mode_function, (UNSPEC_SME_ADDVA, -+ UNSPEC_SME_ADDVA, -1)) -+FUNCTION (svcntsb, svcnts_bhwd_impl, (VNx16QImode)) -+FUNCTION (svcntsd, svcnts_bhwd_impl, (VNx2DImode)) -+FUNCTION (svcntsh, svcnts_bhwd_impl, (VNx8HImode)) -+FUNCTION (svcntsw, svcnts_bhwd_impl, (VNx4SImode)) -+FUNCTION (svld1_hor_za, svld1_za_impl, (UNSPEC_SME_LD1_HOR)) -+FUNCTION (svld1_ver_za, svld1_za_impl, (UNSPEC_SME_LD1_VER)) -+FUNCTION (svldr_za, svldr_za_impl, ) -+FUNCTION (svmopa_za, sme_2mode_function, (UNSPEC_SME_SMOPA, UNSPEC_SME_UMOPA, -+ UNSPEC_SME_FMOPA)) -+FUNCTION (svmops_za, sme_2mode_function, (UNSPEC_SME_SMOPS, UNSPEC_SME_UMOPS, -+ UNSPEC_SME_FMOPS)) -+FUNCTION (svread_hor_za, svread_za_tile_impl, (UNSPEC_SME_READ_HOR)) -+FUNCTION (svread_ver_za, svread_za_tile_impl, (UNSPEC_SME_READ_VER)) -+FUNCTION (svst1_hor_za, svst1_za_impl, (UNSPEC_SME_ST1_HOR)) -+FUNCTION (svst1_ver_za, svst1_za_impl, (UNSPEC_SME_ST1_VER)) -+FUNCTION (svstr_za, svstr_za_impl, ) -+FUNCTION (svsumopa_za, sme_2mode_function, (UNSPEC_SME_SUMOPA, -1, -1)) -+FUNCTION (svsumops_za, sme_2mode_function, (UNSPEC_SME_SUMOPS, -1, -1)) -+FUNCTION (svundef_za, svundef_za_impl, ) -+FUNCTION (svusmopa_za, sme_2mode_function, (-1, UNSPEC_SME_USMOPA, -1)) -+FUNCTION (svusmops_za, sme_2mode_function, (-1, UNSPEC_SME_USMOPS, -1)) -+FUNCTION (svwrite_hor_za, svwrite_za_tile_impl, (UNSPEC_SME_WRITE_HOR)) -+FUNCTION (svwrite_ver_za, svwrite_za_tile_impl, (UNSPEC_SME_WRITE_VER)) -+FUNCTION (svzero_mask_za, svzero_mask_za_impl, ) -+FUNCTION (svzero_za, svzero_za_impl, ) -+ -+} /* end namespace aarch64_sve */ -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def b/gcc/config/aarch64/aarch64-sve-builtins-sme.def -new file mode 100644 -index 000000000..5bdcc93f4 ---- /dev/null -+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def -@@ -0,0 +1,76 @@ -+/* ACLE support for AArch64 SME. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3, or (at your option) -+ any later version. -+ -+ GCC is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ . */ -+ -+#define REQUIRED_EXTENSIONS 0 -+DEF_SVE_FUNCTION (arm_has_sme, bool_inherent, none, none) -+DEF_SVE_FUNCTION (arm_in_streaming_mode, bool_inherent, none, none) -+#undef REQUIRED_EXTENSIONS -+ -+#define REQUIRED_EXTENSIONS AARCH64_FL_SME -+DEF_SVE_FUNCTION (svcntsb, count_inherent, none, none) -+DEF_SVE_FUNCTION (svcntsd, count_inherent, none, none) -+DEF_SVE_FUNCTION (svcntsh, count_inherent, none, none) -+DEF_SVE_FUNCTION (svcntsw, count_inherent, none, none) -+DEF_SME_ZA_FUNCTION (svldr, ldr_za, za, none) -+DEF_SME_ZA_FUNCTION (svstr, str_za, za, none) -+DEF_SME_ZA_FUNCTION (svundef, inherent_za, za, none) -+DEF_SME_ZA_FUNCTION (svzero, inherent_za, za, none) -+DEF_SME_ZA_FUNCTION (svzero_mask, inherent_mask_za, za, none) -+#undef REQUIRED_EXTENSIONS -+ -+#define REQUIRED_EXTENSIONS AARCH64_FL_SME | AARCH64_FL_SM_ON -+DEF_SME_ZA_FUNCTION (svaddha, unary_za_m, za_s_integer, za_m) -+DEF_SME_ZA_FUNCTION (svaddva, unary_za_m, za_s_integer, za_m) -+DEF_SME_ZA_FUNCTION (svld1_hor, load_za, all_za, none) -+DEF_SME_ZA_FUNCTION (svld1_ver, load_za, all_za, none) -+DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, mop_base, za_m) -+DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, d_za, za_m) -+DEF_SME_ZA_FUNCTION (svmops, binary_za_m, mop_base, za_m) -+DEF_SME_ZA_FUNCTION (svmops, binary_za_m, d_za, za_m) -+DEF_SME_ZA_FUNCTION (svread_hor, read_za_m, za_all_data, m) -+DEF_SME_ZA_FUNCTION (svread_ver, read_za_m, za_all_data, m) -+DEF_SME_ZA_FUNCTION (svst1_hor, store_za, all_za, none) -+DEF_SME_ZA_FUNCTION (svst1_ver, store_za, all_za, none) -+DEF_SME_ZA_FUNCTION (svsumopa, binary_za_uint_m, mop_base_signed, za_m) -+DEF_SME_ZA_FUNCTION (svsumops, binary_za_uint_m, mop_base_signed, za_m) -+DEF_SME_ZA_FUNCTION (svusmopa, binary_za_int_m, mop_base_unsigned, za_m) -+DEF_SME_ZA_FUNCTION (svusmops, binary_za_int_m, mop_base_unsigned, za_m) -+DEF_SME_ZA_FUNCTION (svwrite_hor, write_za_m, za_all_data, za_m) -+DEF_SME_ZA_FUNCTION (svwrite_ver, write_za_m, za_all_data, za_m) -+#undef REQUIRED_EXTENSIONS -+ -+#define REQUIRED_EXTENSIONS (AARCH64_FL_SME \ -+ | AARCH64_FL_SME_I16I64 \ -+ | AARCH64_FL_SM_ON) -+DEF_SME_ZA_FUNCTION (svaddha, unary_za_m, za_d_integer, za_m) -+DEF_SME_ZA_FUNCTION (svaddva, unary_za_m, za_d_integer, za_m) -+DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, mop_i16i64, za_m) -+DEF_SME_ZA_FUNCTION (svmops, binary_za_m, mop_i16i64, za_m) -+DEF_SME_ZA_FUNCTION (svsumopa, binary_za_uint_m, mop_i16i64_signed, za_m) -+DEF_SME_ZA_FUNCTION (svsumops, binary_za_uint_m, mop_i16i64_signed, za_m) -+DEF_SME_ZA_FUNCTION (svusmopa, binary_za_int_m, mop_i16i64_unsigned, za_m) -+DEF_SME_ZA_FUNCTION (svusmops, binary_za_int_m, mop_i16i64_unsigned, za_m) -+#undef REQUIRED_EXTENSIONS -+ -+#define REQUIRED_EXTENSIONS (AARCH64_FL_SME \ -+ | AARCH64_FL_SME_F64F64 \ -+ | AARCH64_FL_SM_ON) -+DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, za_d_float, za_m) -+DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_d_float, za_m) -+#undef REQUIRED_EXTENSIONS -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.h b/gcc/config/aarch64/aarch64-sve-builtins-sme.h -new file mode 100644 -index 000000000..acfed7700 ---- /dev/null -+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.h -@@ -0,0 +1,57 @@ -+/* ACLE support for AArch64 SME. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3, or (at your option) -+ any later version. -+ -+ GCC is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ . */ -+ -+#ifndef GCC_AARCH64_SVE_BUILTINS_SME_H -+#define GCC_AARCH64_SVE_BUILTINS_SME_H -+ -+namespace aarch64_sve -+{ -+ namespace functions -+ { -+ extern const function_base *const arm_has_sme; -+ extern const function_base *const arm_in_streaming_mode; -+ extern const function_base *const svaddha_za; -+ extern const function_base *const svaddva_za; -+ extern const function_base *const svcntsb; -+ extern const function_base *const svcntsd; -+ extern const function_base *const svcntsh; -+ extern const function_base *const svcntsw; -+ extern const function_base *const svld1_hor_za; -+ extern const function_base *const svld1_ver_za; -+ extern const function_base *const svldr_za; -+ extern const function_base *const svmopa_za; -+ extern const function_base *const svmops_za; -+ extern const function_base *const svread_hor_za; -+ extern const function_base *const svread_ver_za; -+ extern const function_base *const svst1_hor_za; -+ extern const function_base *const svst1_ver_za; -+ extern const function_base *const svstr_za; -+ extern const function_base *const svsumopa_za; -+ extern const function_base *const svsumops_za; -+ extern const function_base *const svusmopa_za; -+ extern const function_base *const svusmops_za; -+ extern const function_base *const svwrite_hor_za; -+ extern const function_base *const svwrite_ver_za; -+ extern const function_base *const svundef_za; -+ extern const function_base *const svzero_za; -+ extern const function_base *const svzero_mask_za; -+ } -+} -+ -+#endif -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc -index 3441b4294..32971a7c8 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc -@@ -51,6 +51,7 @@ - #include "aarch64-sve-builtins.h" - #include "aarch64-sve-builtins-base.h" - #include "aarch64-sve-builtins-sve2.h" -+#include "aarch64-sve-builtins-sme.h" - #include "aarch64-sve-builtins-shapes.h" - - namespace aarch64_sve { -@@ -112,6 +113,7 @@ static const char *const pred_suffixes[NUM_PREDS + 1] = { - "_m", - "_x", - "_z", -+ "_m", - "" - }; - -@@ -136,12 +138,28 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = { - TYPE_##CLASS == TYPE_signed || TYPE_##CLASS == TYPE_unsigned, \ - TYPE_##CLASS == TYPE_unsigned, \ - TYPE_##CLASS == TYPE_float, \ -+ TYPE_##CLASS != TYPE_bool, \ - TYPE_##CLASS == TYPE_bool, \ -+ false, \ -+ 0, \ -+ MODE }, -+#define DEF_SME_ZA_SUFFIX(NAME, BITS, MODE) \ -+ { "_" #NAME, \ -+ NUM_VECTOR_TYPES, \ -+ NUM_TYPE_CLASSES, \ -+ BITS, \ -+ BITS / BITS_PER_UNIT, \ -+ false, \ -+ false, \ -+ false, \ -+ false, \ -+ false, \ -+ true, \ - 0, \ - MODE }, - #include "aarch64-sve-builtins.def" - { "", NUM_VECTOR_TYPES, TYPE_bool, 0, 0, false, false, false, false, -- 0, VOIDmode } -+ false, false, 0, VOIDmode } - }; - - CONSTEXPR const group_suffix_info group_suffixes[] = { -@@ -422,6 +440,79 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { - TYPES_while1 (D, b32), \ - TYPES_while1 (D, b64) - -+/* _za8 _za16 _za32 _za64 _za128. */ -+#define TYPES_all_za(S, D) \ -+ S (za8), S (za16), S (za32), S (za64), S (za128) -+ -+/* _za64. */ -+#define TYPES_d_za(S, D) \ -+ S (za64) -+ -+/* { _za8 } x { _s8 _u8 } -+ -+ { _za16 } x { _bf16 _f16 _s16 _u16 } -+ -+ { _za32 } x { _f32 _s32 _u32 } -+ -+ { _za64 } x { _f64 _s64 _u64 }. */ -+#define TYPES_za_bhsd_data(S, D) \ -+ D (za8, s8), D (za8, u8), \ -+ D (za16, bf16), D (za16, f16), D (za16, s16), D (za16, u16), \ -+ D (za32, f32), D (za32, s32), D (za32, u32), \ -+ D (za64, f64), D (za64, s64), D (za64, u64) -+ -+/* Likewise, plus: -+ -+ { _za128 } x { _bf16 } -+ { _f16 _f32 _f64 } -+ { _s8 _s16 _s32 _s64 } -+ { _u8 _u16 _u32 _u64 }. */ -+ -+#define TYPES_za_all_data(S, D) \ -+ TYPES_za_bhsd_data (S, D), \ -+ TYPES_reinterpret1 (D, za128) -+ -+/* _za32 x { _s32 _u32 }. */ -+#define TYPES_za_s_integer(S, D) \ -+ D (za32, s32), D (za32, u32) -+ -+ -+/* _za64_f64. */ -+#define TYPES_za_d_float(S, D) \ -+ D (za64, f64) -+ -+/* _za64 x { _s64 _u64 }. */ -+#define TYPES_za_d_integer(S, D) \ -+ D (za64, s64), D (za64, u64) -+ -+/* _za32 x { _s8 _u8 _bf16 _f16 _f32 }. */ -+#define TYPES_mop_base(S, D) \ -+ D (za32, s8), D (za32, u8), D (za32, bf16), D (za32, f16), D (za32, f32) -+ -+/* _za32_s8. */ -+#define TYPES_mop_base_signed(S, D) \ -+ D (za32, s8) -+ -+/* _za32_u8. */ -+#define TYPES_mop_base_unsigned(S, D) \ -+ D (za32, u8) -+ -+/* _za64 x { _s16 _u16 }. */ -+#define TYPES_mop_i16i64(S, D) \ -+ D (za64, s16), D (za64, u16) -+ -+/* _za64_s16. */ -+#define TYPES_mop_i16i64_signed(S, D) \ -+ D (za64, s16) -+ -+/* _za64_u16. */ -+#define TYPES_mop_i16i64_unsigned(S, D) \ -+ D (za64, u16) -+ -+/* _za. */ -+#define TYPES_za(S, D) \ -+ S (za) -+ - /* Describe a pair of type suffixes in which only the first is used. */ - #define DEF_VECTOR_TYPE(X) { TYPE_SUFFIX_ ## X, NUM_TYPE_SUFFIXES } - -@@ -489,6 +580,19 @@ DEF_SVE_TYPES_ARRAY (cvt_narrow); - DEF_SVE_TYPES_ARRAY (inc_dec_n); - DEF_SVE_TYPES_ARRAY (reinterpret); - DEF_SVE_TYPES_ARRAY (while); -+DEF_SVE_TYPES_ARRAY (all_za); -+DEF_SVE_TYPES_ARRAY (d_za); -+DEF_SVE_TYPES_ARRAY (za_all_data); -+DEF_SVE_TYPES_ARRAY (za_s_integer); -+DEF_SVE_TYPES_ARRAY (za_d_float); -+DEF_SVE_TYPES_ARRAY (za_d_integer); -+DEF_SVE_TYPES_ARRAY (mop_base); -+DEF_SVE_TYPES_ARRAY (mop_base_signed); -+DEF_SVE_TYPES_ARRAY (mop_base_unsigned); -+DEF_SVE_TYPES_ARRAY (mop_i16i64); -+DEF_SVE_TYPES_ARRAY (mop_i16i64_signed); -+DEF_SVE_TYPES_ARRAY (mop_i16i64_unsigned); -+DEF_SVE_TYPES_ARRAY (za); - - static const group_suffix_index groups_none[] = { - GROUP_none, NUM_GROUP_SUFFIXES -@@ -505,6 +609,9 @@ static const predication_index preds_none[] = { PRED_none, NUM_PREDS }; - explicit suffix. */ - static const predication_index preds_implicit[] = { PRED_implicit, NUM_PREDS }; - -+/* Used by functions that only support "_m" predication. */ -+static const predication_index preds_m[] = { PRED_m, NUM_PREDS }; -+ - /* Used by functions that allow merging and "don't care" predication, - but are not suitable for predicated MOVPRFX. */ - static const predication_index preds_mx[] = { -@@ -536,17 +643,23 @@ static const predication_index preds_z_or_none[] = { - /* Used by (mostly predicate) functions that only support "_z" predication. */ - static const predication_index preds_z[] = { PRED_z, NUM_PREDS }; - -+/* Used by SME instructions that always merge into ZA. */ -+static const predication_index preds_za_m[] = { PRED_za_m, NUM_PREDS }; -+ - /* A list of all SVE ACLE functions. */ - static CONSTEXPR const function_group_info function_groups[] = { - #define DEF_SVE_FUNCTION_GS(NAME, SHAPE, TYPES, GROUPS, PREDS) \ - { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, groups_##GROUPS, \ - preds_##PREDS, REQUIRED_EXTENSIONS }, -+#define DEF_SME_ZA_FUNCTION_GS(NAME, SHAPE, TYPES, GROUPS, PREDS) \ -+ { #NAME, &functions::NAME##_za, &shapes::SHAPE, types_##TYPES, \ -+ groups_##GROUPS, preds_##PREDS, (REQUIRED_EXTENSIONS | AARCH64_FL_ZA_ON) }, - #include "aarch64-sve-builtins.def" - }; - - /* The scalar type associated with each vector type. */ --extern GTY(()) tree scalar_types[NUM_VECTOR_TYPES]; --tree scalar_types[NUM_VECTOR_TYPES]; -+extern GTY(()) tree scalar_types[NUM_VECTOR_TYPES + 1]; -+tree scalar_types[NUM_VECTOR_TYPES + 1]; - - /* The single-predicate and single-vector types, with their built-in - "__SV..._t" name. Allow an index of NUM_VECTOR_TYPES, which always -@@ -654,7 +767,7 @@ find_type_suffix_for_scalar_type (const_tree type) - /* A linear search should be OK here, since the code isn't hot and - the number of types is only small. */ - for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i) -- if (!type_suffixes[suffix_i].bool_p) -+ if (type_suffixes[suffix_i].vector_p) - { - vector_type_index vector_i = type_suffixes[suffix_i].vector_type; - if (matches_type_p (scalar_types[vector_i], type)) -@@ -745,6 +858,20 @@ check_required_extensions (location_t location, tree fndecl, - return false; - } - -+ if (missing_extensions & AARCH64_FL_SM_ON) -+ { -+ error_at (location, "ACLE function %qD can only be called when" -+ " SME streaming mode is enabled", fndecl); -+ return false; -+ } -+ -+ if (missing_extensions & AARCH64_FL_ZA_ON) -+ { -+ error_at (location, "ACLE function %qD can only be called from" -+ " a function that has %qs state", fndecl, "za"); -+ return false; -+ } -+ - static const struct { - aarch64_feature_flags flag; - const char *name; -@@ -780,9 +907,13 @@ report_out_of_range (location_t location, tree fndecl, unsigned int argno, - HOST_WIDE_INT actual, HOST_WIDE_INT min, - HOST_WIDE_INT max) - { -- error_at (location, "passing %wd to argument %d of %qE, which expects" -- " a value in the range [%wd, %wd]", actual, argno + 1, fndecl, -- min, max); -+ if (min == max) -+ error_at (location, "passing %wd to argument %d of %qE, which expects" -+ " the value %wd", actual, argno + 1, fndecl, min); -+ else -+ error_at (location, "passing %wd to argument %d of %qE, which expects" -+ " a value in the range [%wd, %wd]", actual, argno + 1, fndecl, -+ min, max); - } - - /* Report that LOCATION has a call to FNDECL in which argument ARGNO has -@@ -869,7 +1000,7 @@ function_instance::reads_global_state_p () const - return true; - - /* Handle direct reads of global state. */ -- return flags & (CP_READ_MEMORY | CP_READ_FFR); -+ return flags & (CP_READ_MEMORY | CP_READ_FFR | CP_READ_ZA); - } - - /* Return true if calls to the function could modify some form of -@@ -890,7 +1021,7 @@ function_instance::modifies_global_state_p () const - return true; - - /* Handle direct modifications of global state. */ -- return flags & (CP_WRITE_MEMORY | CP_WRITE_FFR); -+ return flags & (CP_WRITE_MEMORY | CP_WRITE_FFR | CP_WRITE_ZA); - } - - /* Return true if calls to the function could raise a signal. */ -@@ -922,8 +1053,8 @@ registered_function_hasher::equal (value_type value, const compare_type &key) - return value->instance == key; - } - --sve_switcher::sve_switcher () -- : aarch64_simd_switcher (AARCH64_FL_F16 | AARCH64_FL_SVE) -+sve_switcher::sve_switcher (aarch64_feature_flags flags) -+ : aarch64_simd_switcher (AARCH64_FL_F16 | AARCH64_FL_SVE | flags) - { - /* Changing the ISA flags and have_regs_of_mode should be enough here. - We shouldn't need to pay the compile-time cost of a full target -@@ -979,6 +1110,10 @@ char * - function_builder::get_name (const function_instance &instance, - bool overloaded_p) - { -+ /* __arm_* functions are listed as arm_*, so that the associated GCC -+ code is not in the implementation namespace. */ -+ if (strncmp (instance.base_name, "arm_", 4) == 0) -+ append_name ("__"); - append_name (instance.base_name); - if (overloaded_p) - switch (instance.displacement_units ()) -@@ -1016,12 +1151,72 @@ add_attribute (const char *name, tree attrs) - return tree_cons (get_identifier (name), NULL_TREE, attrs); - } - --/* Return the appropriate function attributes for INSTANCE. */ -+/* Add attribute NS::NAME to ATTRS. */ -+static tree -+add_attribute (const char *ns, const char *name, tree value, tree attrs) -+{ -+ return tree_cons (build_tree_list (get_identifier (ns), -+ get_identifier (name)), -+ value, attrs); -+} -+ -+/* Attribute arm::NAME describes shared state that is an input if IS_IN -+ and an output if IS_OUT. Check whether a call with call properties -+ CALL_FLAGS needs such an attribute. Add it to in-progress attribute -+ list ATTRS if so. Return the new attribute list. */ -+static tree -+add_shared_state_attribute (const char *name, bool is_in, bool is_out, -+ unsigned int call_flags, tree attrs) -+{ -+ struct state_flag_info -+ { -+ const char *name; -+ unsigned int read_flag; -+ unsigned int write_flag; -+ }; -+ static state_flag_info state_flags[] = -+ { -+ { "za", CP_READ_ZA, CP_WRITE_ZA } -+ }; -+ -+ tree args = NULL_TREE; -+ for (const auto &state_flag : state_flags) -+ { -+ auto all_flags = state_flag.read_flag | state_flag.write_flag; -+ auto these_flags = ((is_in ? state_flag.read_flag : 0) -+ | (is_out ? state_flag.write_flag : 0)); -+ if ((call_flags & all_flags) == these_flags) -+ { -+ tree value = build_string (strlen (state_flag.name) + 1, -+ state_flag.name); -+ args = tree_cons (NULL_TREE, value, args); -+ } -+ } -+ if (args) -+ attrs = add_attribute ("arm", name, args, attrs); -+ return attrs; -+} -+ -+/* Return the appropriate function attributes for INSTANCE, which requires -+ the feature flags in REQUIRED_EXTENSIONS. */ - tree --function_builder::get_attributes (const function_instance &instance) -+function_builder::get_attributes (const function_instance &instance, -+ aarch64_feature_flags required_extensions) - { - tree attrs = NULL_TREE; - -+ if (required_extensions & AARCH64_FL_SM_ON) -+ attrs = add_attribute ("arm", "streaming", NULL_TREE, attrs); -+ else if (!(required_extensions & AARCH64_FL_SM_OFF)) -+ attrs = add_attribute ("arm", "streaming_compatible", NULL_TREE, attrs); -+ -+ attrs = add_shared_state_attribute ("in", true, false, -+ instance.call_properties (), attrs); -+ attrs = add_shared_state_attribute ("out", false, true, -+ instance.call_properties (), attrs); -+ attrs = add_shared_state_attribute ("inout", true, true, -+ instance.call_properties (), attrs); -+ - if (!instance.modifies_global_state_p ()) - { - if (instance.reads_global_state_p ()) -@@ -1097,7 +1292,7 @@ add_unique_function (const function_instance &instance, - tree fntype = build_function_type_array (return_type, - argument_types.length (), - argument_types.address ()); -- tree attrs = get_attributes (instance); -+ tree attrs = get_attributes (instance, required_extensions); - registered_function &rfn = add_function (instance, name, fntype, attrs, - required_extensions, false, false); - -@@ -1114,7 +1309,7 @@ add_unique_function (const function_instance &instance, - if (strcmp (name, overload_name) != 0) - { - /* Attribute lists shouldn't be shared. */ -- tree attrs = get_attributes (instance); -+ tree attrs = get_attributes (instance, required_extensions); - bool placeholder_p = !(m_direct_overloads || force_direct_overloads); - add_function (instance, overload_name, fntype, attrs, - required_extensions, false, placeholder_p); -@@ -2283,6 +2478,7 @@ bool - function_resolver::check_gp_argument (unsigned int nops, - unsigned int &i, unsigned int &nargs) - { -+ gcc_assert (pred != PRED_za_m); - i = 0; - if (pred != PRED_none) - { -@@ -2488,9 +2684,7 @@ function_checker::function_checker (location_t location, - unsigned int nargs, tree *args) - : function_call_info (location, instance, fndecl), - m_fntype (fntype), m_nargs (nargs), m_args (args), -- /* We don't have to worry about unary _m operations here, since they -- never have arguments that need checking. */ -- m_base_arg (pred != PRED_none ? 1 : 0) -+ m_base_arg (pred != PRED_none && pred != PRED_za_m ? 1 : 0) - { - } - -@@ -2889,21 +3083,51 @@ function_expander::convert_to_pmode (rtx x) - } - - /* Return the base address for a contiguous load or store function. -- MEM_MODE is the mode of the addressed memory. */ -+ MEM_MODE is the mode of the addressed memory, BASE_ARGNO is -+ the index of the base argument, and VNUM_ARGNO is the index of -+ the vnum offset argument (if any). VL_ISA_MODE is AARCH64_FL_SM_ON -+ if the vnum argument is a factor of the SME vector length, 0 if it -+ is a factor of the current prevailing vector length. */ - rtx --function_expander::get_contiguous_base (machine_mode mem_mode) -+function_expander::get_contiguous_base (machine_mode mem_mode, -+ unsigned int base_argno, -+ unsigned int vnum_argno, -+ aarch64_feature_flags vl_isa_mode) - { -- rtx base = convert_to_pmode (args[1]); -+ rtx base = convert_to_pmode (args[base_argno]); - if (mode_suffix_id == MODE_vnum) - { -- /* Use the size of the memory mode for extending loads and truncating -- stores. Use the size of a full vector for non-extending loads -- and non-truncating stores (including svld[234] and svst[234]). */ -- poly_int64 size = ordered_min (GET_MODE_SIZE (mem_mode), -- BYTES_PER_SVE_VECTOR); -- rtx offset = gen_int_mode (size, Pmode); -- offset = simplify_gen_binary (MULT, Pmode, args[2], offset); -- base = simplify_gen_binary (PLUS, Pmode, base, offset); -+ rtx vnum = args[vnum_argno]; -+ if (vnum != const0_rtx) -+ { -+ /* Use the size of the memory mode for extending loads and truncating -+ stores. Use the size of a full vector for non-extending loads -+ and non-truncating stores (including svld[234] and svst[234]). */ -+ poly_int64 size = ordered_min (GET_MODE_SIZE (mem_mode), -+ BYTES_PER_SVE_VECTOR); -+ rtx offset; -+ if ((vl_isa_mode & AARCH64_FL_SM_ON) -+ && !TARGET_STREAMING -+ && !size.is_constant ()) -+ { -+ gcc_assert (known_eq (size, BYTES_PER_SVE_VECTOR)); -+ if (CONST_INT_P (vnum) && IN_RANGE (INTVAL (vnum), -32, 31)) -+ offset = aarch64_sme_vq_immediate (Pmode, INTVAL (vnum) * 16, -+ AARCH64_ISA_MODE); -+ else -+ { -+ offset = aarch64_sme_vq_immediate (Pmode, 16, -+ AARCH64_ISA_MODE); -+ offset = simplify_gen_binary (MULT, Pmode, vnum, offset); -+ } -+ } -+ else -+ { -+ offset = gen_int_mode (size, Pmode); -+ offset = simplify_gen_binary (MULT, Pmode, vnum, offset); -+ } -+ base = simplify_gen_binary (PLUS, Pmode, base, offset); -+ } - } - return base; - } -@@ -2991,11 +3215,18 @@ function_expander::add_input_operand (insn_code icode, rtx x) - machine_mode mode = operand.mode; - if (mode == VOIDmode) - { -- /* The only allowable use of VOIDmode is the wildcard -- aarch64_any_register_operand, which is used to avoid -- combinatorial explosion in the reinterpret patterns. */ -- gcc_assert (operand.predicate == aarch64_any_register_operand); -- mode = GET_MODE (x); -+ /* The only allowable uses of VOIDmode are: -+ -+ - the wildcard aarch64_any_register_operand, which is used -+ to avoid combinatorial explosion in the reinterpret patterns -+ -+ - pmode_register_operand, which always has mode Pmode. */ -+ if (operand.predicate == aarch64_any_register_operand) -+ mode = GET_MODE (x); -+ else if (operand.predicate == pmode_register_operand) -+ mode = Pmode; -+ else -+ gcc_unreachable (); - } - else if (!VECTOR_MODE_P (GET_MODE (x)) && VECTOR_MODE_P (mode)) - x = expand_vector_broadcast (mode, x); -@@ -3010,7 +3241,7 @@ function_expander::add_input_operand (insn_code icode, rtx x) - - /* Add an integer operand with value X to the instruction. */ - void --function_expander::add_integer_operand (HOST_WIDE_INT x) -+function_expander::add_integer_operand (poly_int64 x) - { - m_ops.safe_grow (m_ops.length () + 1, true); - create_integer_operand (&m_ops.last (), x); -@@ -3555,7 +3786,10 @@ init_builtins () - sve_switcher sve; - register_builtin_types (); - if (in_lto_p) -- handle_arm_sve_h (); -+ { -+ handle_arm_sve_h (); -+ handle_arm_sme_h (); -+ } - } - - /* Register vector type TYPE under its arm_sve.h name. */ -@@ -3705,7 +3939,8 @@ handle_arm_sve_h () - function_table = new hash_table (1023); - function_builder builder; - for (unsigned int i = 0; i < ARRAY_SIZE (function_groups); ++i) -- builder.register_function_group (function_groups[i]); -+ if (!(function_groups[i].required_extensions & AARCH64_FL_SME)) -+ builder.register_function_group (function_groups[i]); - } - - /* Return the function decl with SVE function subcode CODE, or error_mark_node -@@ -3718,6 +3953,33 @@ builtin_decl (unsigned int code, bool) - return (*registered_functions)[code]->decl; - } - -+/* Implement #pragma GCC aarch64 "arm_sme.h". */ -+void -+handle_arm_sme_h () -+{ -+ if (!function_table) -+ { -+ error ("%qs defined without first defining %qs", -+ "arm_sme.h", "arm_sve.h"); -+ return; -+ } -+ -+ static bool initialized_p; -+ if (initialized_p) -+ { -+ error ("duplicate definition of %qs", "arm_sme.h"); -+ return; -+ } -+ initialized_p = true; -+ -+ sme_switcher sme; -+ -+ function_builder builder; -+ for (unsigned int i = 0; i < ARRAY_SIZE (function_groups); ++i) -+ if (function_groups[i].required_extensions & AARCH64_FL_SME) -+ builder.register_function_group (function_groups[i]); -+} -+ - /* If we're implementing manual overloading, check whether the SVE - function with subcode CODE is overloaded, and if so attempt to - determine the corresponding non-overloaded function. The call -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def -index be10b5ea1..69c11b1d0 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.def -+++ b/gcc/config/aarch64/aarch64-sve-builtins.def -@@ -29,6 +29,10 @@ - #define DEF_SVE_TYPE_SUFFIX(A, B, C, D, E) - #endif - -+#ifndef DEF_SME_ZA_SUFFIX -+#define DEF_SME_ZA_SUFFIX(A, B, C) -+#endif -+ - #ifndef DEF_SVE_GROUP_SUFFIX - #define DEF_SVE_GROUP_SUFFIX(A, B, C) - #endif -@@ -42,6 +46,16 @@ - DEF_SVE_FUNCTION_GS (NAME, SHAPE, TYPES, none, PREDS) - #endif - -+#ifndef DEF_SME_ZA_FUNCTION_GS -+#define DEF_SME_ZA_FUNCTION_GS(NAME, SHAPE, TYPES, GROUP, PREDS) \ -+ DEF_SVE_FUNCTION_GS(NAME, SHAPE, TYPES, GROUP, PREDS) -+#endif -+ -+#ifndef DEF_SME_ZA_FUNCTION -+#define DEF_SME_ZA_FUNCTION(NAME, SHAPE, TYPES, PREDS) \ -+ DEF_SME_ZA_FUNCTION_GS (NAME, SHAPE, TYPES, none, PREDS) -+#endif -+ - DEF_SVE_MODE (n, none, none, none) - DEF_SVE_MODE (index, none, none, elements) - DEF_SVE_MODE (offset, none, none, bytes) -@@ -104,16 +118,30 @@ DEF_SVE_TYPE_SUFFIX (u16, svuint16_t, unsigned, 16, VNx8HImode) - DEF_SVE_TYPE_SUFFIX (u32, svuint32_t, unsigned, 32, VNx4SImode) - DEF_SVE_TYPE_SUFFIX (u64, svuint64_t, unsigned, 64, VNx2DImode) - -+/* Associate _za with bytes. This is needed for svldr_vnum_za and -+ svstr_vnum_za, whose ZA offset can be in the range [0, 15], as for za8. */ -+DEF_SME_ZA_SUFFIX (za, 8, VNx16QImode) -+ -+DEF_SME_ZA_SUFFIX (za8, 8, VNx16QImode) -+DEF_SME_ZA_SUFFIX (za16, 16, VNx8HImode) -+DEF_SME_ZA_SUFFIX (za32, 32, VNx4SImode) -+DEF_SME_ZA_SUFFIX (za64, 64, VNx2DImode) -+DEF_SME_ZA_SUFFIX (za128, 128, VNx1TImode) -+ - DEF_SVE_GROUP_SUFFIX (x2, 0, 2) - DEF_SVE_GROUP_SUFFIX (x3, 0, 3) - DEF_SVE_GROUP_SUFFIX (x4, 0, 4) - - #include "aarch64-sve-builtins-base.def" - #include "aarch64-sve-builtins-sve2.def" -+#include "aarch64-sve-builtins-sme.def" - -+#undef DEF_SME_ZA_FUNCTION - #undef DEF_SVE_FUNCTION -+#undef DEF_SME_ZA_FUNCTION_GS - #undef DEF_SVE_FUNCTION_GS - #undef DEF_SVE_GROUP_SUFFIX -+#undef DEF_SME_ZA_SUFFIX - #undef DEF_SVE_TYPE_SUFFIX - #undef DEF_SVE_TYPE - #undef DEF_SVE_MODE -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h -index f16ac3947..6ef6bb93f 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.h -+++ b/gcc/config/aarch64/aarch64-sve-builtins.h -@@ -97,6 +97,8 @@ const unsigned int CP_PREFETCH_MEMORY = 1U << 3; - const unsigned int CP_WRITE_MEMORY = 1U << 4; - const unsigned int CP_READ_FFR = 1U << 5; - const unsigned int CP_WRITE_FFR = 1U << 6; -+const unsigned int CP_READ_ZA = 1U << 7; -+const unsigned int CP_WRITE_ZA = 1U << 8; - - /* Enumerates the SVE predicate and (data) vector types, together called - "vector types" for brevity. */ -@@ -142,6 +144,10 @@ enum predication_index - /* Zero predication: set inactive lanes of the vector result to zero. */ - PRED_z, - -+ /* Merging predication for SME's ZA: merge into slices of the array -+ instead of overwriting the whole slices. */ -+ PRED_za_m, -+ - NUM_PREDS - }; - -@@ -176,6 +182,8 @@ enum type_suffix_index - { - #define DEF_SVE_TYPE_SUFFIX(NAME, ACLE_TYPE, CLASS, BITS, MODE) \ - TYPE_SUFFIX_ ## NAME, -+#define DEF_SME_ZA_SUFFIX(NAME, BITS, MODE) \ -+ TYPE_SUFFIX_ ## NAME, - #include "aarch64-sve-builtins.def" - NUM_TYPE_SUFFIXES - }; -@@ -240,9 +248,13 @@ struct type_suffix_info - unsigned int unsigned_p : 1; - /* True if the suffix is for a floating-point type. */ - unsigned int float_p : 1; -+ /* True if the suffix is for a vector type (integer or float). */ -+ unsigned int vector_p : 1; - /* True if the suffix is for a boolean type. */ - unsigned int bool_p : 1; -- unsigned int spare : 12; -+ /* True if the suffix is for SME's ZA. */ -+ unsigned int za_p : 1; -+ unsigned int spare : 10; - - /* The associated vector or predicate mode. */ - machine_mode vector_mode : 16; -@@ -356,13 +368,15 @@ public: - tree displacement_vector_type () const; - units_index displacement_units () const; - -+ unsigned int num_za_tiles () const; -+ - const type_suffix_info &type_suffix (unsigned int) const; - const group_suffix_info &group_suffix () const; - - tree scalar_type (unsigned int) const; - tree vector_type (unsigned int) const; - tree tuple_type (unsigned int) const; -- unsigned int elements_per_vq (unsigned int i) const; -+ unsigned int elements_per_vq (unsigned int) const; - machine_mode vector_mode (unsigned int) const; - machine_mode tuple_mode (unsigned int) const; - machine_mode gp_mode (unsigned int) const; -@@ -401,7 +415,7 @@ private: - - char *get_name (const function_instance &, bool); - -- tree get_attributes (const function_instance &); -+ tree get_attributes (const function_instance &, aarch64_feature_flags); - - registered_function &add_function (const function_instance &, - const char *, tree, tree, -@@ -605,7 +619,8 @@ public: - bool overlaps_input_p (rtx); - - rtx convert_to_pmode (rtx); -- rtx get_contiguous_base (machine_mode); -+ rtx get_contiguous_base (machine_mode, unsigned int = 1, unsigned int = 2, -+ aarch64_feature_flags = 0); - rtx get_fallback_value (machine_mode, unsigned int, - unsigned int, unsigned int &); - rtx get_reg_target (); -@@ -613,7 +628,7 @@ public: - - void add_output_operand (insn_code); - void add_input_operand (insn_code, rtx); -- void add_integer_operand (HOST_WIDE_INT); -+ void add_integer_operand (poly_int64); - void add_mem_operand (machine_mode, rtx); - void add_address_operand (rtx); - void add_fixed_operand (rtx); -@@ -738,7 +753,7 @@ public: - class sve_switcher : public aarch64_simd_switcher - { - public: -- sve_switcher (); -+ sve_switcher (aarch64_feature_flags = 0); - ~sve_switcher (); - - private: -@@ -746,11 +761,18 @@ private: - bool m_old_have_regs_of_mode[MAX_MACHINE_MODE]; - }; - -+/* Extends sve_switch enough for defining arm_sme.h. */ -+class sme_switcher : public sve_switcher -+{ -+public: -+ sme_switcher () : sve_switcher (AARCH64_FL_SME) {} -+}; -+ - extern const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1]; - extern const mode_suffix_info mode_suffixes[MODE_none + 1]; - extern const group_suffix_info group_suffixes[NUM_GROUP_SUFFIXES]; - --extern tree scalar_types[NUM_VECTOR_TYPES]; -+extern tree scalar_types[NUM_VECTOR_TYPES + 1]; - extern tree acle_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1]; - extern tree acle_svpattern; - extern tree acle_svprfop; -@@ -883,6 +905,16 @@ function_instance::displacement_vector_type () const - return acle_vector_types[0][mode_suffix ().displacement_vector_type]; - } - -+/* Return the number of ZA tiles associated with the _za suffix -+ (which is always the first type suffix). */ -+inline unsigned int -+function_instance::num_za_tiles () const -+{ -+ auto &suffix = type_suffix (0); -+ gcc_checking_assert (suffix.za_p); -+ return suffix.element_bytes; -+} -+ - /* If the function takes a vector or scalar displacement, return the units - in which the displacement is measured, otherwise return UNITS_none. */ - inline units_index -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 112dfeabb..113784e31 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -6160,15 +6160,26 @@ aarch64_output_sve_scalar_inc_dec (rtx offset) - } - - /* Return true if a single RDVL instruction can multiply FACTOR by the -- number of 128-bit quadwords in an SVE vector. */ -+ number of 128-bit quadwords in an SVE vector. This is also the -+ range of ADDVL. */ - - static bool --aarch64_sve_rdvl_factor_p (HOST_WIDE_INT factor) -+aarch64_sve_rdvl_addvl_factor_p (HOST_WIDE_INT factor) - { - return (multiple_p (factor, 16) - && IN_RANGE (factor, -32 * 16, 31 * 16)); - } - -+/* Return true if ADDPL can be used to add FACTOR multiplied by the number -+ of quadwords in an SVE vector. */ -+ -+static bool -+aarch64_sve_addpl_factor_p (HOST_WIDE_INT factor) -+{ -+ return (multiple_p (factor, 2) -+ && IN_RANGE (factor, -32 * 2, 31 * 2)); -+} -+ - /* Return true if we can move VALUE into a register using a single - RDVL instruction. */ - -@@ -6176,7 +6187,7 @@ static bool - aarch64_sve_rdvl_immediate_p (poly_int64 value) - { - HOST_WIDE_INT factor = value.coeffs[0]; -- return value.coeffs[1] == factor && aarch64_sve_rdvl_factor_p (factor); -+ return value.coeffs[1] == factor && aarch64_sve_rdvl_addvl_factor_p (factor); - } - - /* Likewise for rtx X. */ -@@ -6212,10 +6223,8 @@ aarch64_sve_addvl_addpl_immediate_p (poly_int64 value) - HOST_WIDE_INT factor = value.coeffs[0]; - if (factor == 0 || value.coeffs[1] != factor) - return false; -- /* FACTOR counts VG / 2, so a value of 2 is one predicate width -- and a value of 16 is one vector width. */ -- return (((factor & 15) == 0 && IN_RANGE (factor, -32 * 16, 31 * 16)) -- || ((factor & 1) == 0 && IN_RANGE (factor, -32 * 2, 31 * 2))); -+ return (aarch64_sve_rdvl_addvl_factor_p (factor) -+ || aarch64_sve_addpl_factor_p (factor)); - } - - /* Likewise for rtx X. */ -@@ -6315,11 +6324,11 @@ aarch64_output_sve_vector_inc_dec (const char *operands, rtx x) - number of 128-bit quadwords in an SME vector. ISA_MODE is the - ISA mode in which the calculation is being performed. */ - --static rtx -+rtx - aarch64_sme_vq_immediate (machine_mode mode, HOST_WIDE_INT factor, - aarch64_feature_flags isa_mode) - { -- gcc_assert (aarch64_sve_rdvl_factor_p (factor)); -+ gcc_assert (aarch64_sve_rdvl_addvl_factor_p (factor)); - if (isa_mode & AARCH64_FL_SM_ON) - /* We're in streaming mode, so we can use normal poly-int values. */ - return gen_int_mode ({ factor, factor }, mode); -@@ -6362,7 +6371,7 @@ aarch64_rdsvl_immediate_p (const_rtx x) - { - HOST_WIDE_INT factor; - return (aarch64_sme_vq_unspec_p (x, &factor) -- && aarch64_sve_rdvl_factor_p (factor)); -+ && aarch64_sve_rdvl_addvl_factor_p (factor)); - } - - /* Return the asm string for an RDSVL instruction that calculates X, -@@ -6379,6 +6388,38 @@ aarch64_output_rdsvl (const_rtx x) - return buffer; - } - -+/* Return true if X is a constant that can be added using ADDSVL or ADDSPL. */ -+ -+bool -+aarch64_addsvl_addspl_immediate_p (const_rtx x) -+{ -+ HOST_WIDE_INT factor; -+ return (aarch64_sme_vq_unspec_p (x, &factor) -+ && (aarch64_sve_rdvl_addvl_factor_p (factor) -+ || aarch64_sve_addpl_factor_p (factor))); -+} -+ -+/* X is a constant that satisfies aarch64_addsvl_addspl_immediate_p. -+ Return the asm string for the associated instruction. */ -+ -+char * -+aarch64_output_addsvl_addspl (rtx x) -+{ -+ static char buffer[sizeof ("addspl\t%x0, %x1, #-") + 3 * sizeof (int)]; -+ HOST_WIDE_INT factor; -+ if (!aarch64_sme_vq_unspec_p (x, &factor)) -+ gcc_unreachable (); -+ if (aarch64_sve_rdvl_addvl_factor_p (factor)) -+ snprintf (buffer, sizeof (buffer), "addsvl\t%%x0, %%x1, #%d", -+ (int) factor / 16); -+ else if (aarch64_sve_addpl_factor_p (factor)) -+ snprintf (buffer, sizeof (buffer), "addspl\t%%x0, %%x1, #%d", -+ (int) factor / 2); -+ else -+ gcc_unreachable (); -+ return buffer; -+} -+ - /* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */ - - static const unsigned HOST_WIDE_INT bitmask_imm_mul[] = -@@ -6965,7 +7006,7 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, - { - /* Try to use an unshifted CNT[BHWD] or RDVL. */ - if (aarch64_sve_cnt_factor_p (factor) -- || aarch64_sve_rdvl_factor_p (factor)) -+ || aarch64_sve_rdvl_addvl_factor_p (factor)) - { - val = gen_int_mode (poly_int64 (factor, factor), mode); - shift = 0; -@@ -12185,7 +12226,7 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x, - && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))]) - index = SUBREG_REG (index); - -- if (aarch64_sve_data_mode_p (mode)) -+ if (aarch64_sve_data_mode_p (mode) || mode == VNx1TImode) - { - if (type != ADDRESS_REG_REG - || (1 << shift) != GET_MODE_UNIT_SIZE (mode)) -@@ -12288,7 +12329,8 @@ aarch64_classify_address (struct aarch64_address_info *info, - && ((vec_flags == 0 - && known_lt (GET_MODE_SIZE (mode), 16)) - || vec_flags == VEC_ADVSIMD -- || vec_flags & VEC_SVE_DATA)); -+ || vec_flags & VEC_SVE_DATA -+ || mode == VNx1TImode)); - - /* For SVE, only accept [Rn], [Rn, #offset, MUL VL] and [Rn, Rm, LSL #shift]. - The latter is not valid for SVE predicates, and that's rejected through -@@ -12407,7 +12449,7 @@ aarch64_classify_address (struct aarch64_address_info *info, - /* Make "m" use the LD1 offset range for SVE data modes, so - that pre-RTL optimizers like ivopts will work to that - instead of the wider LDR/STR range. */ -- if (vec_flags == VEC_SVE_DATA) -+ if (vec_flags == VEC_SVE_DATA || mode == VNx1TImode) - return (type == ADDR_QUERY_M - ? offset_4bit_signed_scaled_p (mode, offset) - : offset_9bit_signed_scaled_p (mode, offset)); -@@ -14750,6 +14792,51 @@ aarch64_output_casesi (rtx *operands) - return ""; - } - -+/* Return the asm string for an SME ZERO instruction whose 8-bit mask -+ operand is MASK. */ -+const char * -+aarch64_output_sme_zero_za (rtx mask) -+{ -+ auto mask_val = UINTVAL (mask); -+ if (mask_val == 0) -+ return "zero\t{}"; -+ -+ if (mask_val == 0xff) -+ return "zero\t{ za }"; -+ -+ static constexpr std::pair tiles[] = { -+ { 0xff, 'b' }, -+ { 0x55, 'h' }, -+ { 0x11, 's' }, -+ { 0x01, 'd' } -+ }; -+ /* The last entry in the list has the form "za7.d }", but that's the -+ same length as "za7.d, ". */ -+ static char buffer[sizeof("zero\t{ ") + sizeof ("za7.d, ") * 8 + 1]; -+ unsigned int i = 0; -+ i += snprintf (buffer + i, sizeof (buffer) - i, "zero\t"); -+ const char *prefix = "{ "; -+ for (auto &tile : tiles) -+ { -+ auto tile_mask = tile.first; -+ unsigned int tile_index = 0; -+ while (tile_mask < 0x100) -+ { -+ if ((mask_val & tile_mask) == tile_mask) -+ { -+ i += snprintf (buffer + i, sizeof (buffer) - i, "%sza%d.%c", -+ prefix, tile_index, tile.second); -+ prefix = ", "; -+ mask_val &= ~tile_mask; -+ } -+ tile_mask <<= 1; -+ tile_index += 1; -+ } -+ } -+ gcc_assert (mask_val == 0 && i + 3 <= sizeof (buffer)); -+ snprintf (buffer + i, sizeof (buffer) - i, " }"); -+ return buffer; -+} - - /* Return size in bits of an arithmetic operand which is shifted/scaled and - masked such that it is suitable for a UXTB, UXTH, or UXTW extend -@@ -23756,6 +23843,31 @@ aarch64_sve_struct_memory_operand_p (rtx op) - && offset_4bit_signed_scaled_p (SVE_BYTE_MODE, last)); - } - -+/* Return true if OFFSET is a constant integer and if VNUM is -+ OFFSET * the number of bytes in an SVE vector. This is the requirement -+ that exists in SME LDR and STR instructions, where the VL offset must -+ equal the ZA slice offset. */ -+bool -+aarch64_sme_ldr_vnum_offset_p (rtx offset, rtx vnum) -+{ -+ if (!CONST_INT_P (offset) || !IN_RANGE (INTVAL (offset), 0, 15)) -+ return false; -+ -+ if (TARGET_STREAMING) -+ { -+ poly_int64 const_vnum; -+ return (poly_int_rtx_p (vnum, &const_vnum) -+ && known_eq (const_vnum, -+ INTVAL (offset) * BYTES_PER_SVE_VECTOR)); -+ } -+ else -+ { -+ HOST_WIDE_INT factor; -+ return (aarch64_sme_vq_unspec_p (vnum, &factor) -+ && factor == INTVAL (offset) * 16); -+ } -+} -+ - /* Emit a register copy from operand to operand, taking care not to - early-clobber source registers in the process. - -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 8b21faf34..50fdf2f50 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -207,6 +207,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - /* Macros to test ISA flags. */ - - #define AARCH64_ISA_SM_OFF (aarch64_isa_flags & AARCH64_FL_SM_OFF) -+#define AARCH64_ISA_SM_ON (aarch64_isa_flags & AARCH64_FL_SM_ON) - #define AARCH64_ISA_ZA_ON (aarch64_isa_flags & AARCH64_FL_ZA_ON) - #define AARCH64_ISA_MODE (aarch64_isa_flags & AARCH64_FL_ISA_MODES) - #define AARCH64_ISA_CRC (aarch64_isa_flags & AARCH64_FL_CRC) -@@ -224,6 +225,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - #define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3) - #define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4) - #define AARCH64_ISA_SME (aarch64_isa_flags & AARCH64_FL_SME) -+#define AARCH64_ISA_SME_I16I64 (aarch64_isa_flags & AARCH64_FL_SME_I16I64) -+#define AARCH64_ISA_SME_F64F64 (aarch64_isa_flags & AARCH64_FL_SME_F64F64) - #define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3A) - #define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD) - #define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES) -@@ -256,6 +259,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - /* The current function is a normal non-streaming function. */ - #define TARGET_NON_STREAMING (AARCH64_ISA_SM_OFF) - -+/* The current function has a streaming body. */ -+#define TARGET_STREAMING (AARCH64_ISA_SM_ON) -+ - /* The current function has a streaming-compatible body. */ - #define TARGET_STREAMING_COMPATIBLE \ - ((aarch64_isa_flags & AARCH64_FL_SM_STATE) == 0) -@@ -316,6 +322,15 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF; - imply anything about the state of PSTATE.SM. */ - #define TARGET_SME (AARCH64_ISA_SME) - -+/* Streaming-mode SME instructions. */ -+#define TARGET_STREAMING_SME (TARGET_STREAMING && TARGET_SME) -+ -+/* The FEAT_SME_I16I64 extension to SME, enabled through +sme-i16i64. */ -+#define TARGET_SME_I16I64 (AARCH64_ISA_SME_I16I64) -+ -+/* The FEAT_SME_F64F64 extension to SME, enabled through +sme-f64f64. */ -+#define TARGET_SME_F64F64 (AARCH64_ISA_SME_F64F64) -+ - /* ARMv8.3-A features. */ - #define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A) - -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index 05a7c6675..6b4341866 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -2147,10 +2147,10 @@ - - (define_insn "*add3_aarch64" - [(set -- (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,r,rk") -+ (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,r,rk,rk") - (plus:GPI -- (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,0,rk") -- (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uai,Uav")))] -+ (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,0,rk,rk") -+ (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uai,Uav,UaV")))] - "" - "@ - add\\t%0, %1, %2 -@@ -2159,10 +2159,11 @@ - sub\\t%0, %1, #%n2 - # - * return aarch64_output_sve_scalar_inc_dec (operands[2]); -- * return aarch64_output_sve_addvl_addpl (operands[2]);" -+ * return aarch64_output_sve_addvl_addpl (operands[2]); -+ * return aarch64_output_addsvl_addspl (operands[2]);" - ;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders. -- [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm,alu_imm") -- (set_attr "arch" "*,*,simd,*,*,sve,sve")] -+ [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm,alu_imm,alu_imm") -+ (set_attr "arch" "*,*,simd,*,*,sve,sve,sme")] - ) - - ;; zero_extend version of above -diff --git a/gcc/config/aarch64/arm_sme.h b/gcc/config/aarch64/arm_sme.h -new file mode 100644 -index 000000000..5ddd49f57 ---- /dev/null -+++ b/gcc/config/aarch64/arm_sme.h -@@ -0,0 +1,45 @@ -+/* AArch64 SME intrinsics include file. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published -+ by the Free Software Foundation; either version 3, or (at your -+ option) any later version. -+ -+ GCC is distributed in the hope that it will be useful, but WITHOUT -+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -+ License for more details. -+ -+ Under Section 7 of GPL version 3, you are granted additional -+ permissions described in the GCC Runtime Library Exception, version -+ 3.1, as published by the Free Software Foundation. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ . */ -+ -+#ifndef _ARM_SME_H_ -+#define _ARM_SME_H_ -+ -+#include -+#pragma GCC aarch64 "arm_sme.h" -+ -+void __arm_za_disable(void) __arm_streaming_compatible; -+ -+void *__arm_sc_memcpy(void *, const void *, __SIZE_TYPE__) -+ __arm_streaming_compatible; -+ -+void *__arm_sc_memmove(void *, const void *, __SIZE_TYPE__) -+ __arm_streaming_compatible; -+ -+void *__arm_sc_memset(void *, int, __SIZE_TYPE__) -+ __arm_streaming_compatible; -+ -+void *__arm_sc_memchr(void *, int, __SIZE_TYPE__) -+ __arm_streaming_compatible; -+ -+#endif -diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md -index 88fb9a07c..2da423779 100644 ---- a/gcc/config/aarch64/constraints.md -+++ b/gcc/config/aarch64/constraints.md -@@ -21,6 +21,9 @@ - (define_register_constraint "k" "STACK_REG" - "@internal The stack register.") - -+(define_register_constraint "Ucj" "W12_W15_REGS" -+ "@internal r12-r15, which can be used to index ZA.") -+ - (define_register_constraint "Ucs" "TAILCALL_ADDR_REGS" - "@internal Registers suitable for an indirect tail call") - -@@ -74,6 +77,12 @@ - a single ADDVL or ADDPL." - (match_operand 0 "aarch64_sve_addvl_addpl_immediate")) - -+(define_constraint "UaV" -+ "@internal -+ A constraint that matches a VG-based constant that can be added by -+ a single ADDSVL or ADDSPL." -+ (match_operand 0 "aarch64_addsvl_addspl_immediate")) -+ - (define_constraint "Uat" - "@internal - A constraint that matches a VG-based constant that can be added by -diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md -index b616f5c9a..152d28f6b 100644 ---- a/gcc/config/aarch64/iterators.md -+++ b/gcc/config/aarch64/iterators.md -@@ -450,6 +450,7 @@ - (define_mode_iterator VNx4SF_ONLY [VNx4SF]) - (define_mode_iterator VNx2DI_ONLY [VNx2DI]) - (define_mode_iterator VNx2DF_ONLY [VNx2DF]) -+(define_mode_iterator VNx1TI_ONLY [VNx1TI]) - - ;; All fully-packed SVE vector modes. - (define_mode_iterator SVE_FULL [VNx16QI VNx8HI VNx4SI VNx2DI -@@ -608,6 +609,17 @@ - ;; Bfloat16 modes to which V4SF can be converted - (define_mode_iterator V4SF_TO_BF [V4BF V8BF]) - -+;; The modes used to represent different ZA access sizes. -+(define_mode_iterator SME_ZA_I [VNx16QI VNx8HI VNx4SI VNx2DI VNx1TI]) -+(define_mode_iterator SME_ZA_SDI [VNx4SI (VNx2DI "TARGET_SME_I16I64")]) -+ -+(define_mode_iterator SME_ZA_SDF_I [VNx4SI (VNx2DI "TARGET_SME_F64F64")]) -+ -+;; The modes for which outer product instructions are supported. -+(define_mode_iterator SME_MOP_BHI [VNx16QI (VNx8HI "TARGET_SME_I16I64")]) -+(define_mode_iterator SME_MOP_HSDF [VNx8BF VNx8HF VNx4SF -+ (VNx2DF "TARGET_SME_F64F64")]) -+ - ;; ------------------------------------------------------------------ - ;; Unspec enumerations for Advance SIMD. These could well go into - ;; aarch64.md but for their use in int_iterators here. -@@ -986,6 +998,28 @@ - UNSPEC_BFCVTN2 ; Used in aarch64-simd.md. - UNSPEC_BFCVT ; Used in aarch64-simd.md. - UNSPEC_FCVTXN ; Used in aarch64-simd.md. -+ -+ ;; All used in aarch64-sme.md -+ UNSPEC_SME_ADDHA -+ UNSPEC_SME_ADDVA -+ UNSPEC_SME_FMOPA -+ UNSPEC_SME_FMOPS -+ UNSPEC_SME_LD1_HOR -+ UNSPEC_SME_LD1_VER -+ UNSPEC_SME_READ_HOR -+ UNSPEC_SME_READ_VER -+ UNSPEC_SME_SMOPA -+ UNSPEC_SME_SMOPS -+ UNSPEC_SME_ST1_HOR -+ UNSPEC_SME_ST1_VER -+ UNSPEC_SME_SUMOPA -+ UNSPEC_SME_SUMOPS -+ UNSPEC_SME_UMOPA -+ UNSPEC_SME_UMOPS -+ UNSPEC_SME_USMOPA -+ UNSPEC_SME_USMOPS -+ UNSPEC_SME_WRITE_HOR -+ UNSPEC_SME_WRITE_VER - ]) - - ;; ------------------------------------------------------------------ -@@ -1115,9 +1149,15 @@ - ;; element. - (define_mode_attr elem_bits [(VNx16BI "8") (VNx8BI "16") - (VNx4BI "32") (VNx2BI "64") -- (VNx16QI "8") (VNx8HI "16") -- (VNx4SI "32") (VNx2DI "64") -- (VNx8HF "16") (VNx4SF "32") (VNx2DF "64")]) -+ (VNx16QI "8") (VNx32QI "8") (VNx64QI "8") -+ (VNx8HI "16") (VNx16HI "16") (VNx32HI "16") -+ (VNx8HF "16") (VNx16HF "16") (VNx32HF "16") -+ (VNx8BF "16") (VNx16BF "16") (VNx32BF "16") -+ (VNx4SI "32") (VNx8SI "32") (VNx16SI "32") -+ (VNx4SF "32") (VNx8SF "32") (VNx16SF "32") -+ (VNx2DI "64") (VNx4DI "64") (VNx8DI "64") -+ (VNx2DF "64") (VNx4DF "64") (VNx8DF "64") -+ (VNx1TI "128")]) - - ;; The number of bits in a vector container. - (define_mode_attr container_bits [(VNx16QI "8") -@@ -1243,6 +1283,7 @@ - (VNx4SF "s") (VNx2SF "s") - (VNx2DI "d") - (VNx2DF "d") -+ (VNx1TI "q") - (BF "h") (V4BF "h") (V8BF "h") - (HF "h") - (SF "s") (DF "d") -@@ -1261,6 +1302,7 @@ - (VNx4SF "w") (VNx2SF "w") - (VNx2DI "d") - (VNx2DF "d") -+ (VNx1TI "q") - (VNx32QI "b") (VNx48QI "b") (VNx64QI "b") - (VNx16HI "h") (VNx24HI "h") (VNx32HI "h") - (VNx16HF "h") (VNx24HF "h") (VNx32HF "h") -@@ -2052,6 +2094,7 @@ - (VNx4SF "VNx4BI") (VNx2SF "VNx2BI") - (VNx2DI "VNx2BI") - (VNx2DF "VNx2BI") -+ (VNx1TI "VNx2BI") - (VNx32QI "VNx16BI") - (VNx16HI "VNx8BI") (VNx16HF "VNx8BI") - (VNx16BF "VNx8BI") -@@ -2132,6 +2175,8 @@ - ;; The constraint to use for an SVE FCMLA lane index. - (define_mode_attr sve_lane_pair_con [(VNx8HF "y") (VNx4SF "x")]) - -+(define_mode_attr b [(VNx8BF "b") (VNx8HF "") (VNx4SF "") (VNx2DF "")]) -+ - ;; ------------------------------------------------------------------- - ;; Code Iterators - ;; ------------------------------------------------------------------- -@@ -3159,6 +3204,20 @@ - (define_int_iterator FCMUL_OP [UNSPEC_FCMUL - UNSPEC_FCMUL_CONJ]) - -+(define_int_iterator SME_LD1 [UNSPEC_SME_LD1_HOR UNSPEC_SME_LD1_VER]) -+(define_int_iterator SME_READ [UNSPEC_SME_READ_HOR UNSPEC_SME_READ_VER]) -+(define_int_iterator SME_ST1 [UNSPEC_SME_ST1_HOR UNSPEC_SME_ST1_VER]) -+(define_int_iterator SME_WRITE [UNSPEC_SME_WRITE_HOR UNSPEC_SME_WRITE_VER]) -+ -+(define_int_iterator SME_BINARY_SDI [UNSPEC_SME_ADDHA UNSPEC_SME_ADDVA]) -+ -+(define_int_iterator SME_INT_MOP [UNSPEC_SME_SMOPA UNSPEC_SME_SMOPS -+ UNSPEC_SME_SUMOPA UNSPEC_SME_SUMOPS -+ UNSPEC_SME_UMOPA UNSPEC_SME_UMOPS -+ UNSPEC_SME_USMOPA UNSPEC_SME_USMOPS]) -+ -+(define_int_iterator SME_FP_MOP [UNSPEC_SME_FMOPA UNSPEC_SME_FMOPS]) -+ - ;; Iterators for atomic operations. - - (define_int_iterator ATOMIC_LDOP -@@ -3231,6 +3290,26 @@ - (UNSPEC_PMULLT "pmullt") - (UNSPEC_PMULLT_PAIR "pmullt_pair") - (UNSPEC_SMATMUL "smatmul") -+ (UNSPEC_SME_ADDHA "addha") -+ (UNSPEC_SME_ADDVA "addva") -+ (UNSPEC_SME_FMOPA "fmopa") -+ (UNSPEC_SME_FMOPS "fmops") -+ (UNSPEC_SME_LD1_HOR "ld1_hor") -+ (UNSPEC_SME_LD1_VER "ld1_ver") -+ (UNSPEC_SME_READ_HOR "read_hor") -+ (UNSPEC_SME_READ_VER "read_ver") -+ (UNSPEC_SME_SMOPA "smopa") -+ (UNSPEC_SME_SMOPS "smops") -+ (UNSPEC_SME_ST1_HOR "st1_hor") -+ (UNSPEC_SME_ST1_VER "st1_ver") -+ (UNSPEC_SME_SUMOPA "sumopa") -+ (UNSPEC_SME_SUMOPS "sumops") -+ (UNSPEC_SME_UMOPA "umopa") -+ (UNSPEC_SME_UMOPS "umops") -+ (UNSPEC_SME_USMOPA "usmopa") -+ (UNSPEC_SME_USMOPS "usmops") -+ (UNSPEC_SME_WRITE_HOR "write_hor") -+ (UNSPEC_SME_WRITE_VER "write_ver") - (UNSPEC_SQCADD90 "sqcadd90") - (UNSPEC_SQCADD270 "sqcadd270") - (UNSPEC_SQRDCMLAH "sqrdcmlah") -@@ -4000,6 +4079,15 @@ - (define_int_attr unspec [(UNSPEC_WHILERW "UNSPEC_WHILERW") - (UNSPEC_WHILEWR "UNSPEC_WHILEWR")]) - -+(define_int_attr hv [(UNSPEC_SME_LD1_HOR "h") -+ (UNSPEC_SME_LD1_VER "v") -+ (UNSPEC_SME_READ_HOR "h") -+ (UNSPEC_SME_READ_VER "v") -+ (UNSPEC_SME_ST1_HOR "h") -+ (UNSPEC_SME_ST1_VER "v") -+ (UNSPEC_SME_WRITE_HOR "h") -+ (UNSPEC_SME_WRITE_VER "v")]) -+ - ;; Iterators and attributes for fpcr fpsr getter setters - - (define_int_iterator GET_FPSCR -diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md -index 1b8496c07..3ec9e9103 100644 ---- a/gcc/config/aarch64/predicates.md -+++ b/gcc/config/aarch64/predicates.md -@@ -212,11 +212,17 @@ - (and (match_code "const_poly_int") - (match_test "aarch64_add_offset_temporaries (op) == 1"))) - -+(define_predicate "aarch64_addsvl_addspl_immediate" -+ (and (match_code "const") -+ (match_test "aarch64_addsvl_addspl_immediate_p (op)"))) -+ - (define_predicate "aarch64_pluslong_operand" - (ior (match_operand 0 "register_operand") - (match_operand 0 "aarch64_pluslong_immediate") - (and (match_test "TARGET_SVE") -- (match_operand 0 "aarch64_sve_plus_immediate")))) -+ (match_operand 0 "aarch64_sve_plus_immediate")) -+ (and (match_test "TARGET_SME") -+ (match_operand 0 "aarch64_addsvl_addspl_immediate")))) - - (define_predicate "aarch64_pluslong_or_poly_operand" - (ior (match_operand 0 "aarch64_pluslong_operand") -diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64 -index 49731ba92..be60cc003 100644 ---- a/gcc/config/aarch64/t-aarch64 -+++ b/gcc/config/aarch64/t-aarch64 -@@ -63,6 +63,7 @@ aarch64-sve-builtins.o: $(srcdir)/config/aarch64/aarch64-sve-builtins.cc \ - $(srcdir)/config/aarch64/aarch64-sve-builtins.def \ - $(srcdir)/config/aarch64/aarch64-sve-builtins-base.def \ - $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.def \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.def \ - $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \ - $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) $(DIAGNOSTIC_H) \ - $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \ -@@ -72,7 +73,8 @@ aarch64-sve-builtins.o: $(srcdir)/config/aarch64/aarch64-sve-builtins.cc \ - $(srcdir)/config/aarch64/aarch64-sve-builtins.h \ - $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h \ - $(srcdir)/config/aarch64/aarch64-sve-builtins-base.h \ -- $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.h -+ $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.h \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.h - $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ - $(srcdir)/config/aarch64/aarch64-sve-builtins.cc - -@@ -113,6 +115,19 @@ aarch64-sve-builtins-sve2.o: \ - $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ - $(srcdir)/config/aarch64/aarch64-sve-builtins-sve2.cc - -+aarch64-sve-builtins-sme.o: \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.cc \ -+ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \ -+ $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) \ -+ $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \ -+ gimple-iterator.h gimplify.h explow.h $(EMIT_RTL_H) \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins.h \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.h \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins-functions.h -+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins-sme.cc -+ - aarch64-builtin-iterators.h: $(srcdir)/config/aarch64/geniterators.sh \ - $(srcdir)/config/aarch64/iterators.md - $(SHELL) $(srcdir)/config/aarch64/geniterators.sh \ -diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi -index 2420b05d9..47fff9c90 100644 ---- a/gcc/doc/invoke.texi -+++ b/gcc/doc/invoke.texi -@@ -19480,6 +19480,10 @@ Enable the Flag Manipulation instructions Extension. - Enable the Pointer Authentication Extension. - @item sme - Enable the Scalable Matrix Extension. -+@item sme-i16i64 -+Enable the FEAT_SME_I16I64 extension to SME. -+@item sme-f64f64 -+Enable the FEAT_SME_F64F64 extension to SME. - - @end table - -diff --git a/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme-acle-asm.exp b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme-acle-asm.exp -new file mode 100644 -index 000000000..a9ed3a195 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme-acle-asm.exp -@@ -0,0 +1,82 @@ -+# Assembly-based regression-test driver for the SME ACLE. -+# Copyright (C) 2009-2023 Free Software Foundation, Inc. -+# -+# This file is part of GCC. -+# -+# GCC is free software; you can redistribute it and/or modify it -+# under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3, or (at your option) -+# any later version. -+# -+# GCC is distributed in the hope that it will be useful, but -+# WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# . */ -+ -+# GCC testsuite that uses the `dg.exp' driver. -+ -+# Exit immediately if this isn't an AArch64 target. -+if {![istarget aarch64*-*-*] } { -+ return -+} -+ -+# Load support procs. -+load_lib g++-dg.exp -+ -+# Initialize `dg'. -+dg-init -+ -+# Force SME if we're not testing it already. -+if { [check_effective_target_aarch64_sme] } { -+ set sme_flags "" -+} else { -+ set sme_flags "-march=armv9-a+sme" -+} -+ -+# Turn off any codegen tweaks by default that may affect expected assembly. -+# Tests relying on those should turn them on explicitly. -+set sme_flags "$sme_flags -mtune=generic -moverride=tune=none" -+ -+global gcc_runtest_parallelize_limit_minor -+if { [info exists gcc_runtest_parallelize_limit_minor] } { -+ set old_limit_minor $gcc_runtest_parallelize_limit_minor -+ set gcc_runtest_parallelize_limit_minor 1 -+} -+ -+torture-init -+set-torture-options { -+ "-std=c++11 -O0 -g" -+ "-std=c++14 -O1 -g" -+ "-std=c++17 -Og -g" -+ "-std=c++23 -Os -g" -+ "-std=gnu++11 -O2 -fno-schedule-insns -fno-schedule-insns2 -DCHECK_ASM --save-temps" -+ "-std=gnu++23 -Ofast -g" -+} { -+ "-DTEST_FULL" -+ "-DTEST_OVERLOADS" -+} -+ -+# Main loop. -+set gcc_subdir [string replace $subdir 0 2 gcc] -+set files [glob -nocomplain $srcdir/$gcc_subdir/acle-asm/*.c] -+set save-dg-do-what-default ${dg-do-what-default} -+if { [check_effective_target_aarch64_asm_sme-i16i64_ok] } { -+ set dg-do-what-default assemble -+} else { -+ set dg-do-what-default compile -+} -+gcc-dg-runtest [lsort $files] "" "$sme_flags -fno-ipa-icf" -+set dg-do-what-default ${save-dg-do-what-default} -+ -+torture-finish -+ -+if { [info exists gcc_runtest_parallelize_limit_minor] } { -+ set gcc_runtest_parallelize_limit_minor $old_limit_minor -+} -+ -+# All done. -+dg-finish -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_4.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_4.c -index 9591e3d01..f2f922d4f 100644 ---- a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_4.c -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_4.c -@@ -4,6 +4,7 @@ - to be diagnosed. Any attempt to call the function before including - arm_sve.h will lead to a link failure. (Same for taking its address, - etc.) */ --extern __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t); -+extern __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t) -+ __arm_streaming_compatible; - - #pragma GCC aarch64 "arm_sve.h" -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_5.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_5.c -index f87201984..f24ef002c 100644 ---- a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_5.c -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_5.c -@@ -2,6 +2,7 @@ - - __SVUint8_t - svadd_u8_x (__SVBool_t pg, __SVUint8_t x, __SVUint8_t y) -+ __arm_streaming_compatible - { - return x; - } -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_7.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_7.c -index 1f2e4bf66..6752ea11e 100644 ---- a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_7.c -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_7.c -@@ -2,6 +2,7 @@ - - __SVUint8_t - svadd_x (__SVBool_t pg, __SVUint8_t x, __SVUint8_t y) -+ __arm_streaming_compatible - { - return x; - } -diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c -index 0e6461fa4..23ebe5e4f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c -+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c -@@ -45,3 +45,41 @@ - #ifdef __ARM_FEATURE_SVE2_SHA3 - #error Foo - #endif -+ -+#pragma GCC target "+sme" -+#ifndef __ARM_FEATURE_SME -+#error Foo -+#endif -+ -+#pragma GCC target "+sme+nofp" -+#ifdef __ARM_FEATURE_SME -+#error Foo -+#endif -+ -+#pragma GCC target "+sme+nosimd" -+#ifdef __ARM_FEATURE_SME -+#error Foo -+#endif -+ -+#pragma GCC target "+sme+nobf16" -+#ifdef __ARM_FEATURE_SME -+#error Foo -+#endif -+ -+#pragma GCC target "+nothing+sme" -+#ifdef __ARM_FEATURE_SME_I16I64 -+#error Foo -+#endif -+#ifdef __ARM_FEATURE_SME_F64F64 -+#error Foo -+#endif -+ -+#pragma GCC target "+sme-i16i64" -+#ifndef __ARM_FEATURE_SME_I16I64 -+#error Foo -+#endif -+ -+#pragma GCC target "+sme-f64f64" -+#ifndef __ARM_FEATURE_SME_F64F64 -+#error Foo -+#endif -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp -new file mode 100644 -index 000000000..e2d002f26 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp -@@ -0,0 +1,81 @@ -+# Assembly-based regression-test driver for the SME ACLE. -+# Copyright (C) 2009-2023 Free Software Foundation, Inc. -+# -+# This file is part of GCC. -+# -+# GCC is free software; you can redistribute it and/or modify it -+# under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3, or (at your option) -+# any later version. -+# -+# GCC is distributed in the hope that it will be useful, but -+# WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# . */ -+ -+# GCC testsuite that uses the `dg.exp' driver. -+ -+# Exit immediately if this isn't an AArch64 target. -+if {![istarget aarch64*-*-*] } { -+ return -+} -+ -+# Load support procs. -+load_lib gcc-dg.exp -+ -+# Initialize `dg'. -+dg-init -+ -+# Force SME if we're not testing it already. -+if { [check_effective_target_aarch64_sme] } { -+ set sme_flags "" -+} else { -+ set sme_flags "-march=armv9-a+sme" -+} -+ -+# Turn off any codegen tweaks by default that may affect expected assembly. -+# Tests relying on those should turn them on explicitly. -+set sme_flags "$sme_flags -mtune=generic -moverride=tune=none" -+ -+global gcc_runtest_parallelize_limit_minor -+if { [info exists gcc_runtest_parallelize_limit_minor] } { -+ set old_limit_minor $gcc_runtest_parallelize_limit_minor -+ set gcc_runtest_parallelize_limit_minor 1 -+} -+ -+torture-init -+set-torture-options { -+ "-std=c90 -O0 -g" -+ "-std=c99 -Og -g" -+ "-std=c11 -Os -g" -+ "-std=c23 -O2 -fno-schedule-insns -fno-schedule-insns2 -DCHECK_ASM --save-temps" -+ "-std=gnu90 -O3 -g" -+ "-std=gnu23 -Ofast -g" -+} { -+ "-DTEST_FULL" -+ "-DTEST_OVERLOADS" -+} -+ -+# Main loop. -+set files [glob -nocomplain $srcdir/$subdir/acle-asm/*.c] -+set save-dg-do-what-default ${dg-do-what-default} -+if { [check_effective_target_aarch64_asm_sme-i16i64_ok] } { -+ set dg-do-what-default assemble -+} else { -+ set dg-do-what-default compile -+} -+gcc-dg-runtest [lsort $files] "" "$sme_flags -fno-ipa-icf" -+set dg-do-what-default ${save-dg-do-what-default} -+ -+torture-finish -+ -+if { [info exists gcc_runtest_parallelize_limit_minor] } { -+ set gcc_runtest_parallelize_limit_minor $old_limit_minor -+} -+ -+# All done. -+dg-finish -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za32.c -new file mode 100644 -index 000000000..8dee40145 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za32.c -@@ -0,0 +1,48 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** addha_za32_s32_0_p0_p1_z0: -+** addha za0\.s, p0/m, p1/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_ZA (addha_za32_s32_0_p0_p1_z0, svint32_t, -+ svaddha_za32_s32_m (0, p0, p1, z0), -+ svaddha_za32_m (0, p0, p1, z0)) -+ -+/* -+** addha_za32_s32_0_p1_p0_z1: -+** addha za0\.s, p1/m, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZA (addha_za32_s32_0_p1_p0_z1, svint32_t, -+ svaddha_za32_s32_m (0, p1, p0, z1), -+ svaddha_za32_m (0, p1, p0, z1)) -+ -+/* -+** addha_za32_s32_1_p0_p1_z0: -+** addha za1\.s, p0/m, p1/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_ZA (addha_za32_s32_1_p0_p1_z0, svint32_t, -+ svaddha_za32_s32_m (1, p0, p1, z0), -+ svaddha_za32_m (1, p0, p1, z0)) -+ -+/* -+** addha_za32_s32_3_p0_p1_z0: -+** addha za3\.s, p0/m, p1/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_ZA (addha_za32_s32_3_p0_p1_z0, svint32_t, -+ svaddha_za32_s32_m (3, p0, p1, z0), -+ svaddha_za32_m (3, p0, p1, z0)) -+ -+/* -+** addha_za32_u32_0_p0_p1_z0: -+** addha za0\.s, p0/m, p1/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_ZA (addha_za32_u32_0_p0_p1_z0, svuint32_t, -+ svaddha_za32_u32_m (0, p0, p1, z0), -+ svaddha_za32_m (0, p0, p1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za64.c -new file mode 100644 -index 000000000..363ff1aab ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addha_za64.c -@@ -0,0 +1,50 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+#pragma GCC target "+sme-i16i64" -+ -+/* -+** addha_za64_s64_0_p0_p1_z0: -+** addha za0\.d, p0/m, p1/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_ZA (addha_za64_s64_0_p0_p1_z0, svint64_t, -+ svaddha_za64_s64_m (0, p0, p1, z0), -+ svaddha_za64_m (0, p0, p1, z0)) -+ -+/* -+** addha_za64_s64_0_p1_p0_z1: -+** addha za0\.d, p1/m, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZA (addha_za64_s64_0_p1_p0_z1, svint64_t, -+ svaddha_za64_s64_m (0, p1, p0, z1), -+ svaddha_za64_m (0, p1, p0, z1)) -+ -+/* -+** addha_za64_s64_1_p0_p1_z0: -+** addha za1\.d, p0/m, p1/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_ZA (addha_za64_s64_1_p0_p1_z0, svint64_t, -+ svaddha_za64_s64_m (1, p0, p1, z0), -+ svaddha_za64_m (1, p0, p1, z0)) -+ -+/* -+** addha_za64_s64_7_p0_p1_z0: -+** addha za7\.d, p0/m, p1/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_ZA (addha_za64_s64_7_p0_p1_z0, svint64_t, -+ svaddha_za64_s64_m (7, p0, p1, z0), -+ svaddha_za64_m (7, p0, p1, z0)) -+ -+/* -+** addha_za64_u64_0_p0_p1_z0: -+** addha za0\.d, p0/m, p1/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_ZA (addha_za64_u64_0_p0_p1_z0, svuint64_t, -+ svaddha_za64_u64_m (0, p0, p1, z0), -+ svaddha_za64_m (0, p0, p1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za32.c -new file mode 100644 -index 000000000..0de019ac8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za32.c -@@ -0,0 +1,48 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** addva_za32_s32_0_p0_p1_z0: -+** addva za0\.s, p0/m, p1/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_ZA (addva_za32_s32_0_p0_p1_z0, svint32_t, -+ svaddva_za32_s32_m (0, p0, p1, z0), -+ svaddva_za32_m (0, p0, p1, z0)) -+ -+/* -+** addva_za32_s32_0_p1_p0_z1: -+** addva za0\.s, p1/m, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZA (addva_za32_s32_0_p1_p0_z1, svint32_t, -+ svaddva_za32_s32_m (0, p1, p0, z1), -+ svaddva_za32_m (0, p1, p0, z1)) -+ -+/* -+** addva_za32_s32_1_p0_p1_z0: -+** addva za1\.s, p0/m, p1/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_ZA (addva_za32_s32_1_p0_p1_z0, svint32_t, -+ svaddva_za32_s32_m (1, p0, p1, z0), -+ svaddva_za32_m (1, p0, p1, z0)) -+ -+/* -+** addva_za32_s32_3_p0_p1_z0: -+** addva za3\.s, p0/m, p1/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_ZA (addva_za32_s32_3_p0_p1_z0, svint32_t, -+ svaddva_za32_s32_m (3, p0, p1, z0), -+ svaddva_za32_m (3, p0, p1, z0)) -+ -+/* -+** addva_za32_u32_0_p0_p1_z0: -+** addva za0\.s, p0/m, p1/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_ZA (addva_za32_u32_0_p0_p1_z0, svuint32_t, -+ svaddva_za32_u32_m (0, p0, p1, z0), -+ svaddva_za32_m (0, p0, p1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za64.c -new file mode 100644 -index 000000000..d83d4e03c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/addva_za64.c -@@ -0,0 +1,50 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+#pragma GCC target "+sme-i16i64" -+ -+/* -+** addva_za64_s64_0_p0_p1_z0: -+** addva za0\.d, p0/m, p1/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_ZA (addva_za64_s64_0_p0_p1_z0, svint64_t, -+ svaddva_za64_s64_m (0, p0, p1, z0), -+ svaddva_za64_m (0, p0, p1, z0)) -+ -+/* -+** addva_za64_s64_0_p1_p0_z1: -+** addva za0\.d, p1/m, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZA (addva_za64_s64_0_p1_p0_z1, svint64_t, -+ svaddva_za64_s64_m (0, p1, p0, z1), -+ svaddva_za64_m (0, p1, p0, z1)) -+ -+/* -+** addva_za64_s64_1_p0_p1_z0: -+** addva za1\.d, p0/m, p1/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_ZA (addva_za64_s64_1_p0_p1_z0, svint64_t, -+ svaddva_za64_s64_m (1, p0, p1, z0), -+ svaddva_za64_m (1, p0, p1, z0)) -+ -+/* -+** addva_za64_s64_7_p0_p1_z0: -+** addva za7\.d, p0/m, p1/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_ZA (addva_za64_s64_7_p0_p1_z0, svint64_t, -+ svaddva_za64_s64_m (7, p0, p1, z0), -+ svaddva_za64_m (7, p0, p1, z0)) -+ -+/* -+** addva_za64_u64_0_p0_p1_z0: -+** addva za0\.d, p0/m, p1/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_ZA (addva_za64_u64_0_p0_p1_z0, svuint64_t, -+ svaddva_za64_u64_m (0, p0, p1, z0), -+ svaddva_za64_m (0, p0, p1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_has_sme_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_has_sme_sc.c -new file mode 100644 -index 000000000..e37793f9e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_has_sme_sc.c -@@ -0,0 +1,25 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#define STREAMING_COMPATIBLE -+#define NO_SHARED_ZA -+#include "test_sme_acle.h" -+ -+#pragma GCC target "+nosme" -+ -+/* -+** test_nosme: -+** ... -+** bl __arm_sme_state -+** lsr x0, x0, #?63 -+** ... -+*/ -+PROTO (test_nosme, int, ()) { return __arm_has_sme (); } -+ -+#pragma GCC target "+sme" -+ -+/* -+** test_sme: -+** mov w0, #?1 -+** ret -+*/ -+PROTO (test_sme, int, ()) { return __arm_has_sme (); } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_ns.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_ns.c -new file mode 100644 -index 000000000..ba475d67b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_ns.c -@@ -0,0 +1,11 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#define NON_STREAMING -+#include "test_sme_acle.h" -+ -+/* -+** test_sme: -+** mov w0, #?0 -+** ret -+*/ -+PROTO (test_sme, int, ()) { return __arm_in_streaming_mode (); } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_s.c -new file mode 100644 -index 000000000..b88d47921 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_s.c -@@ -0,0 +1,11 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#define NO_SHARED_ZA -+#include "test_sme_acle.h" -+ -+/* -+** test_sme: -+** mov w0, #?1 -+** ret -+*/ -+PROTO (test_sme, int, ()) { return __arm_in_streaming_mode (); } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_sc.c -new file mode 100644 -index 000000000..fb3588a64 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/arm_in_streaming_mode_sc.c -@@ -0,0 +1,26 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#define STREAMING_COMPATIBLE -+#define NO_SHARED_ZA -+#include "test_sme_acle.h" -+ -+#pragma GCC target "+nosme" -+ -+/* -+** test_nosme: -+** ... -+** bl __arm_sme_state -+** and w0, w0, #?1 -+** ... -+*/ -+PROTO (test_nosme, int, ()) { return __arm_in_streaming_mode (); } -+ -+#pragma GCC target "+sme" -+ -+/* -+** test_sme: -+** mrs x([0-9]+), svcr -+** and w0, w\1, #?1 -+** ret -+*/ -+PROTO (test_sme, int, ()) { return __arm_in_streaming_mode (); } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_s.c -new file mode 100644 -index 000000000..0a8de45be ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_s.c -@@ -0,0 +1,310 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#define NO_SHARED_ZA -+#include "test_sme_acle.h" -+ -+/* -+** cntb_1: -+** cntb x0 -+** ret -+*/ -+PROTO (cntb_1, uint64_t, ()) { return svcntsb (); } -+ -+/* -+** cntb_2: -+** cntb x0, all, mul #2 -+** ret -+*/ -+PROTO (cntb_2, uint64_t, ()) { return svcntsb () * 2; } -+ -+/* -+** cntb_3: -+** cntb x0, all, mul #3 -+** ret -+*/ -+PROTO (cntb_3, uint64_t, ()) { return svcntsb () * 3; } -+ -+/* -+** cntb_4: -+** cntb x0, all, mul #4 -+** ret -+*/ -+PROTO (cntb_4, uint64_t, ()) { return svcntsb () * 4; } -+ -+/* -+** cntb_8: -+** cntb x0, all, mul #8 -+** ret -+*/ -+PROTO (cntb_8, uint64_t, ()) { return svcntsb () * 8; } -+ -+/* -+** cntb_15: -+** cntb x0, all, mul #15 -+** ret -+*/ -+PROTO (cntb_15, uint64_t, ()) { return svcntsb () * 15; } -+ -+/* -+** cntb_16: -+** cntb x0, all, mul #16 -+** ret -+*/ -+PROTO (cntb_16, uint64_t, ()) { return svcntsb () * 16; } -+ -+/* -+** cntb_17: -+** rdvl x0, #17 -+** ret -+*/ -+PROTO (cntb_17, uint64_t, ()) { return svcntsb () * 17; } -+ -+/* -+** cntb_31: -+** rdvl x0, #31 -+** ret -+*/ -+PROTO (cntb_31, uint64_t, ()) { return svcntsb () * 31; } -+ -+/* -+** cntb_32: -+** cntb (x[0-9]+) -+** lsl x0, \1, 5 -+** ret -+*/ -+PROTO (cntb_32, uint64_t, ()) { return svcntsb () * 32; } -+ -+/* Other sequences would be OK. */ -+/* -+** cntb_33: -+** cntb (x[0-9]+) -+** lsl x0, \1, 5 -+** incb x0 -+** ret -+*/ -+PROTO (cntb_33, uint64_t, ()) { return svcntsb () * 33; } -+ -+/* -+** cntb_64: -+** cntb (x[0-9]+) -+** lsl x0, \1, 6 -+** ret -+*/ -+PROTO (cntb_64, uint64_t, ()) { return svcntsb () * 64; } -+ -+/* -+** cntb_128: -+** cntb (x[0-9]+) -+** lsl x0, \1, 7 -+** ret -+*/ -+PROTO (cntb_128, uint64_t, ()) { return svcntsb () * 128; } -+ -+/* Other sequences would be OK. */ -+/* -+** cntb_129: -+** cntb (x[0-9]+) -+** lsl x0, \1, 7 -+** incb x0 -+** ret -+*/ -+PROTO (cntb_129, uint64_t, ()) { return svcntsb () * 129; } -+ -+/* -+** cntb_m1: -+** rdvl x0, #-1 -+** ret -+*/ -+PROTO (cntb_m1, uint64_t, ()) { return -svcntsb (); } -+ -+/* -+** cntb_m13: -+** rdvl x0, #-13 -+** ret -+*/ -+PROTO (cntb_m13, uint64_t, ()) { return -svcntsb () * 13; } -+ -+/* -+** cntb_m15: -+** rdvl x0, #-15 -+** ret -+*/ -+PROTO (cntb_m15, uint64_t, ()) { return -svcntsb () * 15; } -+ -+/* -+** cntb_m16: -+** rdvl x0, #-16 -+** ret -+*/ -+PROTO (cntb_m16, uint64_t, ()) { return -svcntsb () * 16; } -+ -+/* -+** cntb_m17: -+** rdvl x0, #-17 -+** ret -+*/ -+PROTO (cntb_m17, uint64_t, ()) { return -svcntsb () * 17; } -+ -+/* -+** cntb_m32: -+** rdvl x0, #-32 -+** ret -+*/ -+PROTO (cntb_m32, uint64_t, ()) { return -svcntsb () * 32; } -+ -+/* -+** cntb_m33: -+** rdvl x0, #-32 -+** decb x0 -+** ret -+*/ -+PROTO (cntb_m33, uint64_t, ()) { return -svcntsb () * 33; } -+ -+/* -+** cntb_m34: -+** rdvl (x[0-9]+), #-17 -+** lsl x0, \1, #?1 -+** ret -+*/ -+PROTO (cntb_m34, uint64_t, ()) { return -svcntsb () * 34; } -+ -+/* -+** cntb_m64: -+** rdvl (x[0-9]+), #-1 -+** lsl x0, \1, #?6 -+** ret -+*/ -+PROTO (cntb_m64, uint64_t, ()) { return -svcntsb () * 64; } -+ -+/* -+** incb_1: -+** incb x0 -+** ret -+*/ -+PROTO (incb_1, uint64_t, (uint64_t x0)) { return x0 + svcntsb (); } -+ -+/* -+** incb_2: -+** incb x0, all, mul #2 -+** ret -+*/ -+PROTO (incb_2, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 2; } -+ -+/* -+** incb_3: -+** incb x0, all, mul #3 -+** ret -+*/ -+PROTO (incb_3, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 3; } -+ -+/* -+** incb_4: -+** incb x0, all, mul #4 -+** ret -+*/ -+PROTO (incb_4, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 4; } -+ -+/* -+** incb_8: -+** incb x0, all, mul #8 -+** ret -+*/ -+PROTO (incb_8, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 8; } -+ -+/* -+** incb_15: -+** incb x0, all, mul #15 -+** ret -+*/ -+PROTO (incb_15, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 15; } -+ -+/* -+** incb_16: -+** incb x0, all, mul #16 -+** ret -+*/ -+PROTO (incb_16, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 16; } -+ -+/* -+** incb_17: -+** addvl x0, x0, #17 -+** ret -+*/ -+PROTO (incb_17, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 17; } -+ -+/* -+** incb_31: -+** addvl x0, x0, #31 -+** ret -+*/ -+PROTO (incb_31, uint64_t, (uint64_t x0)) { return x0 + svcntsb () * 31; } -+ -+/* -+** decb_1: -+** decb x0 -+** ret -+*/ -+PROTO (decb_1, uint64_t, (uint64_t x0)) { return x0 - svcntsb (); } -+ -+/* -+** decb_2: -+** decb x0, all, mul #2 -+** ret -+*/ -+PROTO (decb_2, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 2; } -+ -+/* -+** decb_3: -+** decb x0, all, mul #3 -+** ret -+*/ -+PROTO (decb_3, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 3; } -+ -+/* -+** decb_4: -+** decb x0, all, mul #4 -+** ret -+*/ -+PROTO (decb_4, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 4; } -+ -+/* -+** decb_8: -+** decb x0, all, mul #8 -+** ret -+*/ -+PROTO (decb_8, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 8; } -+ -+/* -+** decb_15: -+** decb x0, all, mul #15 -+** ret -+*/ -+PROTO (decb_15, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 15; } -+ -+/* -+** decb_16: -+** decb x0, all, mul #16 -+** ret -+*/ -+PROTO (decb_16, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 16; } -+ -+/* -+** decb_17: -+** addvl x0, x0, #-17 -+** ret -+*/ -+PROTO (decb_17, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 17; } -+ -+/* -+** decb_31: -+** addvl x0, x0, #-31 -+** ret -+*/ -+PROTO (decb_31, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 31; } -+ -+/* -+** decb_32: -+** addvl x0, x0, #-32 -+** ret -+*/ -+PROTO (decb_32, uint64_t, (uint64_t x0)) { return x0 - svcntsb () * 32; } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_sc.c -new file mode 100644 -index 000000000..9ee4c8afc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsb_sc.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#define STREAMING_COMPATIBLE -+#define NO_SHARED_ZA -+#include "test_sme_acle.h" -+ -+/* -+** cntsb: -+** rdsvl x0, #1 -+** ret -+*/ -+PROTO (cntsb, uint64_t, ()) { return svcntsb (); } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_s.c -new file mode 100644 -index 000000000..3bf9498e9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_s.c -@@ -0,0 +1,277 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#define NO_SHARED_ZA -+#include "test_sme_acle.h" -+ -+/* -+** cntd_1: -+** cntd x0 -+** ret -+*/ -+PROTO (cntd_1, uint64_t, ()) { return svcntsd (); } -+ -+/* -+** cntd_2: -+** cntw x0 -+** ret -+*/ -+PROTO (cntd_2, uint64_t, ()) { return svcntsd () * 2; } -+ -+/* -+** cntd_3: -+** cntd x0, all, mul #3 -+** ret -+*/ -+PROTO (cntd_3, uint64_t, ()) { return svcntsd () * 3; } -+ -+/* -+** cntd_4: -+** cnth x0 -+** ret -+*/ -+PROTO (cntd_4, uint64_t, ()) { return svcntsd () * 4; } -+ -+/* -+** cntd_8: -+** cntb x0 -+** ret -+*/ -+PROTO (cntd_8, uint64_t, ()) { return svcntsd () * 8; } -+ -+/* -+** cntd_15: -+** cntd x0, all, mul #15 -+** ret -+*/ -+PROTO (cntd_15, uint64_t, ()) { return svcntsd () * 15; } -+ -+/* -+** cntd_16: -+** cntb x0, all, mul #2 -+** ret -+*/ -+PROTO (cntd_16, uint64_t, ()) { return svcntsd () * 16; } -+ -+/* Other sequences would be OK. */ -+/* -+** cntd_17: -+** rdvl (x[0-9]+), #17 -+** asr x0, \1, 3 -+** ret -+*/ -+PROTO (cntd_17, uint64_t, ()) { return svcntsd () * 17; } -+ -+/* -+** cntd_32: -+** cntb x0, all, mul #4 -+** ret -+*/ -+PROTO (cntd_32, uint64_t, ()) { return svcntsd () * 32; } -+ -+/* -+** cntd_64: -+** cntb x0, all, mul #8 -+** ret -+*/ -+PROTO (cntd_64, uint64_t, ()) { return svcntsd () * 64; } -+ -+/* -+** cntd_128: -+** cntb x0, all, mul #16 -+** ret -+*/ -+PROTO (cntd_128, uint64_t, ()) { return svcntsd () * 128; } -+ -+/* -+** cntd_m1: -+** cntd (x[0-9]+) -+** neg x0, \1 -+** ret -+*/ -+PROTO (cntd_m1, uint64_t, ()) { return -svcntsd (); } -+ -+/* -+** cntd_m13: -+** cntd (x[0-9]+), all, mul #13 -+** neg x0, \1 -+** ret -+*/ -+PROTO (cntd_m13, uint64_t, ()) { return -svcntsd () * 13; } -+ -+/* -+** cntd_m15: -+** cntd (x[0-9]+), all, mul #15 -+** neg x0, \1 -+** ret -+*/ -+PROTO (cntd_m15, uint64_t, ()) { return -svcntsd () * 15; } -+ -+/* -+** cntd_m16: -+** rdvl x0, #-2 -+** ret -+*/ -+PROTO (cntd_m16, uint64_t, ()) { return -svcntsd () * 16; } -+ -+/* Other sequences would be OK. */ -+/* -+** cntd_m17: -+** rdvl (x[0-9]+), #-17 -+** asr x0, \1, 3 -+** ret -+*/ -+PROTO (cntd_m17, uint64_t, ()) { return -svcntsd () * 17; } -+ -+/* -+** incd_1: -+** incd x0 -+** ret -+*/ -+PROTO (incd_1, uint64_t, (uint64_t x0)) { return x0 + svcntsd (); } -+ -+/* -+** incd_2: -+** incw x0 -+** ret -+*/ -+PROTO (incd_2, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 2; } -+ -+/* -+** incd_3: -+** incd x0, all, mul #3 -+** ret -+*/ -+PROTO (incd_3, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 3; } -+ -+/* -+** incd_4: -+** inch x0 -+** ret -+*/ -+PROTO (incd_4, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 4; } -+ -+/* -+** incd_7: -+** incd x0, all, mul #7 -+** ret -+*/ -+PROTO (incd_7, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 7; } -+ -+/* -+** incd_8: -+** incb x0 -+** ret -+*/ -+PROTO (incd_8, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 8; } -+ -+/* -+** incd_9: -+** incd x0, all, mul #9 -+** ret -+*/ -+PROTO (incd_9, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 9; } -+ -+/* -+** incd_15: -+** incd x0, all, mul #15 -+** ret -+*/ -+PROTO (incd_15, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 15; } -+ -+/* -+** incd_16: -+** incb x0, all, mul #2 -+** ret -+*/ -+PROTO (incd_16, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 16; } -+ -+/* -+** incd_18: -+** incw x0, all, mul #9 -+** ret -+*/ -+PROTO (incd_18, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 18; } -+ -+/* -+** incd_30: -+** incw x0, all, mul #15 -+** ret -+*/ -+PROTO (incd_30, uint64_t, (uint64_t x0)) { return x0 + svcntsd () * 30; } -+ -+/* -+** decd_1: -+** decd x0 -+** ret -+*/ -+PROTO (decd_1, uint64_t, (uint64_t x0)) { return x0 - svcntsd (); } -+ -+/* -+** decd_2: -+** decw x0 -+** ret -+*/ -+PROTO (decd_2, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 2; } -+ -+/* -+** decd_3: -+** decd x0, all, mul #3 -+** ret -+*/ -+PROTO (decd_3, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 3; } -+ -+/* -+** decd_4: -+** dech x0 -+** ret -+*/ -+PROTO (decd_4, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 4; } -+ -+/* -+** decd_7: -+** decd x0, all, mul #7 -+** ret -+*/ -+PROTO (decd_7, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 7; } -+ -+/* -+** decd_8: -+** decb x0 -+** ret -+*/ -+PROTO (decd_8, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 8; } -+ -+/* -+** decd_9: -+** decd x0, all, mul #9 -+** ret -+*/ -+PROTO (decd_9, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 9; } -+ -+/* -+** decd_15: -+** decd x0, all, mul #15 -+** ret -+*/ -+PROTO (decd_15, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 15; } -+ -+/* -+** decd_16: -+** decb x0, all, mul #2 -+** ret -+*/ -+PROTO (decd_16, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 16; } -+ -+/* -+** decd_18: -+** decw x0, all, mul #9 -+** ret -+*/ -+PROTO (decd_18, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 18; } -+ -+/* -+** decd_30: -+** decw x0, all, mul #15 -+** ret -+*/ -+PROTO (decd_30, uint64_t, (uint64_t x0)) { return x0 - svcntsd () * 30; } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_sc.c -new file mode 100644 -index 000000000..90fb374ba ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsd_sc.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#define STREAMING_COMPATIBLE -+#define NO_SHARED_ZA -+#include "test_sme_acle.h" -+ -+/* -+** cntsd: -+** rdsvl (x[0-9])+, #1 -+** lsr x0, \1, #?3 -+** ret -+*/ -+PROTO (cntsd, uint64_t, ()) { return svcntsd (); } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_s.c -new file mode 100644 -index 000000000..021c39a14 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_s.c -@@ -0,0 +1,279 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#define NO_SHARED_ZA -+#include "test_sme_acle.h" -+ -+/* -+** cnth_1: -+** cnth x0 -+** ret -+*/ -+PROTO (cnth_1, uint64_t, ()) { return svcntsh (); } -+ -+/* -+** cnth_2: -+** cntb x0 -+** ret -+*/ -+PROTO (cnth_2, uint64_t, ()) { return svcntsh () * 2; } -+ -+/* -+** cnth_3: -+** cnth x0, all, mul #3 -+** ret -+*/ -+PROTO (cnth_3, uint64_t, ()) { return svcntsh () * 3; } -+ -+/* -+** cnth_4: -+** cntb x0, all, mul #2 -+** ret -+*/ -+PROTO (cnth_4, uint64_t, ()) { return svcntsh () * 4; } -+ -+/* -+** cnth_8: -+** cntb x0, all, mul #4 -+** ret -+*/ -+PROTO (cnth_8, uint64_t, ()) { return svcntsh () * 8; } -+ -+/* -+** cnth_15: -+** cnth x0, all, mul #15 -+** ret -+*/ -+PROTO (cnth_15, uint64_t, ()) { return svcntsh () * 15; } -+ -+/* -+** cnth_16: -+** cntb x0, all, mul #8 -+** ret -+*/ -+PROTO (cnth_16, uint64_t, ()) { return svcntsh () * 16; } -+ -+/* Other sequences would be OK. */ -+/* -+** cnth_17: -+** rdvl (x[0-9]+), #17 -+** asr x0, \1, 1 -+** ret -+*/ -+PROTO (cnth_17, uint64_t, ()) { return svcntsh () * 17; } -+ -+/* -+** cnth_32: -+** cntb x0, all, mul #16 -+** ret -+*/ -+PROTO (cnth_32, uint64_t, ()) { return svcntsh () * 32; } -+ -+/* -+** cnth_64: -+** cntb (x[0-9]+) -+** lsl x0, \1, 5 -+** ret -+*/ -+PROTO (cnth_64, uint64_t, ()) { return svcntsh () * 64; } -+ -+/* -+** cnth_128: -+** cntb (x[0-9]+) -+** lsl x0, \1, 6 -+** ret -+*/ -+PROTO (cnth_128, uint64_t, ()) { return svcntsh () * 128; } -+ -+/* -+** cnth_m1: -+** cnth (x[0-9]+) -+** neg x0, \1 -+** ret -+*/ -+PROTO (cnth_m1, uint64_t, ()) { return -svcntsh (); } -+ -+/* -+** cnth_m13: -+** cnth (x[0-9]+), all, mul #13 -+** neg x0, \1 -+** ret -+*/ -+PROTO (cnth_m13, uint64_t, ()) { return -svcntsh () * 13; } -+ -+/* -+** cnth_m15: -+** cnth (x[0-9]+), all, mul #15 -+** neg x0, \1 -+** ret -+*/ -+PROTO (cnth_m15, uint64_t, ()) { return -svcntsh () * 15; } -+ -+/* -+** cnth_m16: -+** rdvl x0, #-8 -+** ret -+*/ -+PROTO (cnth_m16, uint64_t, ()) { return -svcntsh () * 16; } -+ -+/* Other sequences would be OK. */ -+/* -+** cnth_m17: -+** rdvl (x[0-9]+), #-17 -+** asr x0, \1, 1 -+** ret -+*/ -+PROTO (cnth_m17, uint64_t, ()) { return -svcntsh () * 17; } -+ -+/* -+** inch_1: -+** inch x0 -+** ret -+*/ -+PROTO (inch_1, uint64_t, (uint64_t x0)) { return x0 + svcntsh (); } -+ -+/* -+** inch_2: -+** incb x0 -+** ret -+*/ -+PROTO (inch_2, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 2; } -+ -+/* -+** inch_3: -+** inch x0, all, mul #3 -+** ret -+*/ -+PROTO (inch_3, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 3; } -+ -+/* -+** inch_4: -+** incb x0, all, mul #2 -+** ret -+*/ -+PROTO (inch_4, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 4; } -+ -+/* -+** inch_7: -+** inch x0, all, mul #7 -+** ret -+*/ -+PROTO (inch_7, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 7; } -+ -+/* -+** inch_8: -+** incb x0, all, mul #4 -+** ret -+*/ -+PROTO (inch_8, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 8; } -+ -+/* -+** inch_9: -+** inch x0, all, mul #9 -+** ret -+*/ -+PROTO (inch_9, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 9; } -+ -+/* -+** inch_15: -+** inch x0, all, mul #15 -+** ret -+*/ -+PROTO (inch_15, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 15; } -+ -+/* -+** inch_16: -+** incb x0, all, mul #8 -+** ret -+*/ -+PROTO (inch_16, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 16; } -+ -+/* -+** inch_18: -+** incb x0, all, mul #9 -+** ret -+*/ -+PROTO (inch_18, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 18; } -+ -+/* -+** inch_30: -+** incb x0, all, mul #15 -+** ret -+*/ -+PROTO (inch_30, uint64_t, (uint64_t x0)) { return x0 + svcntsh () * 30; } -+ -+/* -+** dech_1: -+** dech x0 -+** ret -+*/ -+PROTO (dech_1, uint64_t, (uint64_t x0)) { return x0 - svcntsh (); } -+ -+/* -+** dech_2: -+** decb x0 -+** ret -+*/ -+PROTO (dech_2, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 2; } -+ -+/* -+** dech_3: -+** dech x0, all, mul #3 -+** ret -+*/ -+PROTO (dech_3, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 3; } -+ -+/* -+** dech_4: -+** decb x0, all, mul #2 -+** ret -+*/ -+PROTO (dech_4, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 4; } -+ -+/* -+** dech_7: -+** dech x0, all, mul #7 -+** ret -+*/ -+PROTO (dech_7, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 7; } -+ -+/* -+** dech_8: -+** decb x0, all, mul #4 -+** ret -+*/ -+PROTO (dech_8, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 8; } -+ -+/* -+** dech_9: -+** dech x0, all, mul #9 -+** ret -+*/ -+PROTO (dech_9, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 9; } -+ -+/* -+** dech_15: -+** dech x0, all, mul #15 -+** ret -+*/ -+PROTO (dech_15, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 15; } -+ -+/* -+** dech_16: -+** decb x0, all, mul #8 -+** ret -+*/ -+PROTO (dech_16, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 16; } -+ -+/* -+** dech_18: -+** decb x0, all, mul #9 -+** ret -+*/ -+PROTO (dech_18, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 18; } -+ -+/* -+** dech_30: -+** decb x0, all, mul #15 -+** ret -+*/ -+PROTO (dech_30, uint64_t, (uint64_t x0)) { return x0 - svcntsh () * 30; } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_sc.c -new file mode 100644 -index 000000000..9f6c85208 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsh_sc.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#define STREAMING_COMPATIBLE -+#define NO_SHARED_ZA -+#include "test_sme_acle.h" -+ -+/* -+** cntsh: -+** rdsvl (x[0-9])+, #1 -+** lsr x0, \1, #?1 -+** ret -+*/ -+PROTO (cntsh, uint64_t, ()) { return svcntsh (); } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_s.c -new file mode 100644 -index 000000000..c421e1b8e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_s.c -@@ -0,0 +1,278 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#define NO_SHARED_ZA -+#include "test_sme_acle.h" -+ -+/* -+** cntw_1: -+** cntw x0 -+** ret -+*/ -+PROTO (cntw_1, uint64_t, ()) { return svcntsw (); } -+ -+/* -+** cntw_2: -+** cnth x0 -+** ret -+*/ -+PROTO (cntw_2, uint64_t, ()) { return svcntsw () * 2; } -+ -+/* -+** cntw_3: -+** cntw x0, all, mul #3 -+** ret -+*/ -+PROTO (cntw_3, uint64_t, ()) { return svcntsw () * 3; } -+ -+/* -+** cntw_4: -+** cntb x0 -+** ret -+*/ -+PROTO (cntw_4, uint64_t, ()) { return svcntsw () * 4; } -+ -+/* -+** cntw_8: -+** cntb x0, all, mul #2 -+** ret -+*/ -+PROTO (cntw_8, uint64_t, ()) { return svcntsw () * 8; } -+ -+/* -+** cntw_15: -+** cntw x0, all, mul #15 -+** ret -+*/ -+PROTO (cntw_15, uint64_t, ()) { return svcntsw () * 15; } -+ -+/* -+** cntw_16: -+** cntb x0, all, mul #4 -+** ret -+*/ -+PROTO (cntw_16, uint64_t, ()) { return svcntsw () * 16; } -+ -+/* Other sequences would be OK. */ -+/* -+** cntw_17: -+** rdvl (x[0-9]+), #17 -+** asr x0, \1, 2 -+** ret -+*/ -+PROTO (cntw_17, uint64_t, ()) { return svcntsw () * 17; } -+ -+/* -+** cntw_32: -+** cntb x0, all, mul #8 -+** ret -+*/ -+PROTO (cntw_32, uint64_t, ()) { return svcntsw () * 32; } -+ -+/* -+** cntw_64: -+** cntb x0, all, mul #16 -+** ret -+*/ -+PROTO (cntw_64, uint64_t, ()) { return svcntsw () * 64; } -+ -+/* -+** cntw_128: -+** cntb (x[0-9]+) -+** lsl x0, \1, 5 -+** ret -+*/ -+PROTO (cntw_128, uint64_t, ()) { return svcntsw () * 128; } -+ -+/* -+** cntw_m1: -+** cntw (x[0-9]+) -+** neg x0, \1 -+** ret -+*/ -+PROTO (cntw_m1, uint64_t, ()) { return -svcntsw (); } -+ -+/* -+** cntw_m13: -+** cntw (x[0-9]+), all, mul #13 -+** neg x0, \1 -+** ret -+*/ -+PROTO (cntw_m13, uint64_t, ()) { return -svcntsw () * 13; } -+ -+/* -+** cntw_m15: -+** cntw (x[0-9]+), all, mul #15 -+** neg x0, \1 -+** ret -+*/ -+PROTO (cntw_m15, uint64_t, ()) { return -svcntsw () * 15; } -+ -+/* -+** cntw_m16: -+** rdvl (x[0-9]+), #-4 -+** ret -+*/ -+PROTO (cntw_m16, uint64_t, ()) { return -svcntsw () * 16; } -+ -+/* Other sequences would be OK. */ -+/* -+** cntw_m17: -+** rdvl (x[0-9]+), #-17 -+** asr x0, \1, 2 -+** ret -+*/ -+PROTO (cntw_m17, uint64_t, ()) { return -svcntsw () * 17; } -+ -+/* -+** incw_1: -+** incw x0 -+** ret -+*/ -+PROTO (incw_1, uint64_t, (uint64_t x0)) { return x0 + svcntsw (); } -+ -+/* -+** incw_2: -+** inch x0 -+** ret -+*/ -+PROTO (incw_2, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 2; } -+ -+/* -+** incw_3: -+** incw x0, all, mul #3 -+** ret -+*/ -+PROTO (incw_3, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 3; } -+ -+/* -+** incw_4: -+** incb x0 -+** ret -+*/ -+PROTO (incw_4, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 4; } -+ -+/* -+** incw_7: -+** incw x0, all, mul #7 -+** ret -+*/ -+PROTO (incw_7, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 7; } -+ -+/* -+** incw_8: -+** incb x0, all, mul #2 -+** ret -+*/ -+PROTO (incw_8, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 8; } -+ -+/* -+** incw_9: -+** incw x0, all, mul #9 -+** ret -+*/ -+PROTO (incw_9, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 9; } -+ -+/* -+** incw_15: -+** incw x0, all, mul #15 -+** ret -+*/ -+PROTO (incw_15, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 15; } -+ -+/* -+** incw_16: -+** incb x0, all, mul #4 -+** ret -+*/ -+PROTO (incw_16, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 16; } -+ -+/* -+** incw_18: -+** inch x0, all, mul #9 -+** ret -+*/ -+PROTO (incw_18, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 18; } -+ -+/* -+** incw_30: -+** inch x0, all, mul #15 -+** ret -+*/ -+PROTO (incw_30, uint64_t, (uint64_t x0)) { return x0 + svcntsw () * 30; } -+ -+/* -+** decw_1: -+** decw x0 -+** ret -+*/ -+PROTO (decw_1, uint64_t, (uint64_t x0)) { return x0 - svcntsw (); } -+ -+/* -+** decw_2: -+** dech x0 -+** ret -+*/ -+PROTO (decw_2, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 2; } -+ -+/* -+** decw_3: -+** decw x0, all, mul #3 -+** ret -+*/ -+PROTO (decw_3, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 3; } -+ -+/* -+** decw_4: -+** decb x0 -+** ret -+*/ -+PROTO (decw_4, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 4; } -+ -+/* -+** decw_7: -+** decw x0, all, mul #7 -+** ret -+*/ -+PROTO (decw_7, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 7; } -+ -+/* -+** decw_8: -+** decb x0, all, mul #2 -+** ret -+*/ -+PROTO (decw_8, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 8; } -+ -+/* -+** decw_9: -+** decw x0, all, mul #9 -+** ret -+*/ -+PROTO (decw_9, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 9; } -+ -+/* -+** decw_15: -+** decw x0, all, mul #15 -+** ret -+*/ -+PROTO (decw_15, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 15; } -+ -+/* -+** decw_16: -+** decb x0, all, mul #4 -+** ret -+*/ -+PROTO (decw_16, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 16; } -+ -+/* -+** decw_18: -+** dech x0, all, mul #9 -+** ret -+*/ -+PROTO (decw_18, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 18; } -+ -+/* -+** decw_30: -+** dech x0, all, mul #15 -+** ret -+*/ -+PROTO (decw_30, uint64_t, (uint64_t x0)) { return x0 - svcntsw () * 30; } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_sc.c -new file mode 100644 -index 000000000..75ca937c4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/cntsw_sc.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#define STREAMING_COMPATIBLE -+#define NO_SHARED_ZA -+#include "test_sme_acle.h" -+ -+/* -+** cntsw: -+** rdsvl (x[0-9])+, #1 -+** lsr x0, \1, #?2 -+** ret -+*/ -+PROTO (cntsw, uint64_t, ()) { return svcntsw (); } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za128.c -new file mode 100644 -index 000000000..fbbeb4f12 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za128.c -@@ -0,0 +1,77 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_vnum_za128_0_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** ld1q { za0h\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za128_0_0_0, -+ svld1_hor_vnum_za128 (0, 0, p0, x1, 0), -+ svld1_hor_vnum_za128 (0, 0, p0, x1, 0)) -+ -+/* -+** ld1_vnum_za128_7_1_0: -+** mov (w1[2-5]), #?1 -+** ld1q { za7h\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za128_7_1_0, -+ svld1_hor_vnum_za128 (7, 1, p0, x1, 0), -+ svld1_hor_vnum_za128 (7, 1, p0, x1, 0)) -+ -+/* -+** ld1_vnum_za128_11_1_5: -+** incb x1, all, mul #5 -+** mov (w1[2-5]), #?6 -+** ld1q { za11h\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za128_11_1_5, -+ svld1_hor_vnum_za128 (11, 1, p0, x1, 5), -+ svld1_hor_vnum_za128 (11, 1, p0, x1, 5)) -+ -+/* -+** ld1_vnum_za128_3_w0_0: -+** mov (w1[2-5]), w0 -+** ld1q { za3h\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za128_3_w0_0, -+ svld1_hor_vnum_za128 (3, w0, p0, x1, 0), -+ svld1_hor_vnum_za128 (3, w0, p0, x1, 0)) -+ -+/* -+** ld1_vnum_za128_5_w0_0: -+** incb x1, all, mul #13 -+** add (w1[2-5]), w0, #?13 -+** ld1q { za5h\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za128_5_w0_0, -+ svld1_hor_vnum_za128 (5, w0, p0, x1, 13), -+ svld1_hor_vnum_za128 (5, w0, p0, x1, 13)) -+ -+/* -+** ld1_vnum_za128_11_w0_0: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** ld1q { za11h\.q\[\3, 0\] }, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za128_11_w0_0, -+ svld1_hor_vnum_za128 (11, w0, p0, x1, x2), -+ svld1_hor_vnum_za128 (11, w0, p0, x1, x2)) -+ -+/* -+** ld1_vnum_za128_15_w0p1_0: -+** add (w1[2-5]), w0, #?1 -+** ld1q { za15h\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za128_15_w0p1_0, -+ svld1_hor_vnum_za128 (15, w0 + 1, p0, x1, 0), -+ svld1_hor_vnum_za128 (15, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za16.c -new file mode 100644 -index 000000000..30e7a71ed ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za16.c -@@ -0,0 +1,123 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_vnum_za16_1_0_1: -+** incb x1 -+** mov (w1[2-5]), (?:wzr|#?0) -+** ld1h { za1h\.h\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_1_0_1, -+ svld1_hor_vnum_za16 (1, 0, p0, x1, 1), -+ svld1_hor_vnum_za16 (1, 0, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za16_1_1_1: -+** incb x1 -+** mov (w1[2-5]), #?1 -+** ld1h { za1h\.h\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_1_1_1, -+ svld1_hor_vnum_za16 (1, 1, p0, x1, 1), -+ svld1_hor_vnum_za16 (1, 1, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za16_0_0_8: -+** incb x1, all, mul #8 -+** mov (w1[2-5]), #?8 -+** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_0_0_8, -+ svld1_hor_vnum_za16 (0, 0, p0, x1, 8), -+ svld1_hor_vnum_za16 (0, 0, p0, x1, 8)) -+ -+/* -+** ld1_vnum_za16_0_1_8: -+** incb x1, all, mul #8 -+** mov (w1[2-5]), #?9 -+** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_0_1_8, -+ svld1_hor_vnum_za16 (0, 1, p0, x1, 8), -+ svld1_hor_vnum_za16 (0, 1, p0, x1, 8)) -+ -+/* -+** ld1_vnum_za16_0_w0_0: -+** mov (w1[2-5]), w0 -+** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_0_w0_0, -+ svld1_hor_vnum_za16 (0, w0, p0, x1, 0), -+ svld1_hor_vnum_za16 (0, w0, p0, x1, 0)) -+ -+/* -+** ld1_vnum_za16_0_w0_1: -+** incb x1 -+** mov (w1[2-5]), w0 -+** ld1h { za0h\.h\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_0_w0_1, -+ svld1_hor_vnum_za16 (0, w0, p0, x1, 1), -+ svld1_hor_vnum_za16 (0, w0, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za16_0_w0_7: -+** incb x1, all, mul #7 -+** mov (w1[2-5]), w0 -+** ld1h { za0h\.h\[\1, 7\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_0_w0_7, -+ svld1_hor_vnum_za16 (0, w0, p0, x1, 7), -+ svld1_hor_vnum_za16 (0, w0, p0, x1, 7)) -+ -+/* -+** ld1_vnum_za16_1_w0_8: -+** incb x1, all, mul #8 -+** add (w1[2-5]), w0, #?8 -+** ld1h { za1h\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_1_w0_8, -+ svld1_hor_vnum_za16 (1, w0, p0, x1, 8), -+ svld1_hor_vnum_za16 (1, w0, p0, x1, 8)) -+ -+/* -+** ld1_vnum_za16_1_w0_13: -+** incb x1, all, mul #13 -+** add (w1[2-5]), w0, #?13 -+** ld1h { za1h\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_1_w0_13, -+ svld1_hor_vnum_za16 (1, w0, p0, x1, 13), -+ svld1_hor_vnum_za16 (1, w0, p0, x1, 13)) -+ -+/* -+** ld1_vnum_za16_0_w0_x2: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** ld1h { za0h\.h\[\3, 0\] }, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_0_w0_x2, -+ svld1_hor_vnum_za16 (0, w0, p0, x1, x2), -+ svld1_hor_vnum_za16 (0, w0, p0, x1, x2)) -+ -+/* -+** ld1_vnum_za16_1_w0p1_0: -+** mov (w1[2-5]), w0 -+** ld1h { za1h\.h\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_1_w0p1_0, -+ svld1_hor_vnum_za16 (1, w0 + 1, p0, x1, 0), -+ svld1_hor_vnum_za16 (1, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za32.c -new file mode 100644 -index 000000000..49ffaede8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za32.c -@@ -0,0 +1,123 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_vnum_za32_3_0_1: -+** incb x1 -+** mov (w1[2-5]), (?:wzr|#?0) -+** ld1w { za3h\.s\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_3_0_1, -+ svld1_hor_vnum_za32 (3, 0, p0, x1, 1), -+ svld1_hor_vnum_za32 (3, 0, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za32_2_1_1: -+** incb x1 -+** mov (w1[2-5]), #?1 -+** ld1w { za2h\.s\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_2_1_1, -+ svld1_hor_vnum_za32 (2, 1, p0, x1, 1), -+ svld1_hor_vnum_za32 (2, 1, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za32_0_0_4: -+** incb x1, all, mul #4 -+** mov (w1[2-5]), #?4 -+** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_0_0_4, -+ svld1_hor_vnum_za32 (0, 0, p0, x1, 4), -+ svld1_hor_vnum_za32 (0, 0, p0, x1, 4)) -+ -+/* -+** ld1_vnum_za32_2_1_4: -+** incb x1, all, mul #4 -+** mov (w1[2-5]), #?5 -+** ld1w { za2h\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_2_1_4, -+ svld1_hor_vnum_za32 (2, 1, p0, x1, 4), -+ svld1_hor_vnum_za32 (2, 1, p0, x1, 4)) -+ -+/* -+** ld1_vnum_za32_0_w0_0: -+** mov (w1[2-5]), w0 -+** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_0_w0_0, -+ svld1_hor_vnum_za32 (0, w0, p0, x1, 0), -+ svld1_hor_vnum_za32 (0, w0, p0, x1, 0)) -+ -+/* -+** ld1_vnum_za32_0_w0_1: -+** incb x1 -+** mov (w1[2-5]), w0 -+** ld1w { za0h\.s\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_0_w0_1, -+ svld1_hor_vnum_za32 (0, w0, p0, x1, 1), -+ svld1_hor_vnum_za32 (0, w0, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za32_0_w0_3: -+** incb x1, all, mul #3 -+** mov (w1[2-5]), w0 -+** ld1w { za0h\.s\[\1, 3\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_0_w0_3, -+ svld1_hor_vnum_za32 (0, w0, p0, x1, 3), -+ svld1_hor_vnum_za32 (0, w0, p0, x1, 3)) -+ -+/* -+** ld1_vnum_za32_1_w0_4: -+** incb x1, all, mul #4 -+** add (w1[2-5]), w0, #?4 -+** ld1w { za1h\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_1_w0_4, -+ svld1_hor_vnum_za32 (1, w0, p0, x1, 4), -+ svld1_hor_vnum_za32 (1, w0, p0, x1, 4)) -+ -+/* -+** ld1_vnum_za32_3_w0_13: -+** incb x1, all, mul #13 -+** add (w1[2-5]), w0, #?13 -+** ld1w { za3h\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_3_w0_13, -+ svld1_hor_vnum_za32 (3, w0, p0, x1, 13), -+ svld1_hor_vnum_za32 (3, w0, p0, x1, 13)) -+ -+/* -+** ld1_vnum_za32_0_w0_x2: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** ld1w { za0h\.s\[\3, 0\] }, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_0_w0_x2, -+ svld1_hor_vnum_za32 (0, w0, p0, x1, x2), -+ svld1_hor_vnum_za32 (0, w0, p0, x1, x2)) -+ -+/* -+** ld1_vnum_za32_1_w0p1_0: -+** mov (w1[2-5]), w0 -+** ld1w { za1h\.s\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_1_w0p1_0, -+ svld1_hor_vnum_za32 (1, w0 + 1, p0, x1, 0), -+ svld1_hor_vnum_za32 (1, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za64.c -new file mode 100644 -index 000000000..df09b1c81 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za64.c -@@ -0,0 +1,112 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_vnum_za64_3_0_1: -+** incb x1 -+** mov (w1[2-5]), (?:wzr|#?0) -+** ld1d { za3h\.d\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_3_0_1, -+ svld1_hor_vnum_za64 (3, 0, p0, x1, 1), -+ svld1_hor_vnum_za64 (3, 0, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za64_7_1_1: -+** incb x1 -+** mov (w1[2-5]), #?1 -+** ld1d { za7h\.d\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_7_1_1, -+ svld1_hor_vnum_za64 (7, 1, p0, x1, 1), -+ svld1_hor_vnum_za64 (7, 1, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za64_0_0_2: -+** incb x1, all, mul #2 -+** mov (w1[2-5]), #?2 -+** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_0_0_2, -+ svld1_hor_vnum_za64 (0, 0, p0, x1, 2), -+ svld1_hor_vnum_za64 (0, 0, p0, x1, 2)) -+ -+/* -+** ld1_vnum_za64_5_1_2: -+** incb x1, all, mul #2 -+** mov (w1[2-5]), #?3 -+** ld1d { za5h\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_5_1_2, -+ svld1_hor_vnum_za64 (5, 1, p0, x1, 2), -+ svld1_hor_vnum_za64 (5, 1, p0, x1, 2)) -+ -+/* -+** ld1_vnum_za64_0_w0_0: -+** mov (w1[2-5]), w0 -+** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_0_w0_0, -+ svld1_hor_vnum_za64 (0, w0, p0, x1, 0), -+ svld1_hor_vnum_za64 (0, w0, p0, x1, 0)) -+ -+/* -+** ld1_vnum_za64_0_w0_1: -+** incb x1 -+** mov (w1[2-5]), w0 -+** ld1d { za0h\.d\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_0_w0_1, -+ svld1_hor_vnum_za64 (0, w0, p0, x1, 1), -+ svld1_hor_vnum_za64 (0, w0, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za64_6_w0_2: -+** incb x1, all, mul #2 -+** add (w1[2-5]), w0, #?2 -+** ld1d { za6h\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_6_w0_2, -+ svld1_hor_vnum_za64 (6, w0, p0, x1, 2), -+ svld1_hor_vnum_za64 (6, w0, p0, x1, 2)) -+ -+/* -+** ld1_vnum_za64_2_w0_13: -+** incb x1, all, mul #13 -+** add (w1[2-5]), w0, #?13 -+** ld1d { za2h\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_2_w0_13, -+ svld1_hor_vnum_za64 (2, w0, p0, x1, 13), -+ svld1_hor_vnum_za64 (2, w0, p0, x1, 13)) -+ -+/* -+** ld1_vnum_za64_4_w0_x2: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** ld1d { za4h\.d\[\3, 0\] }, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_4_w0_x2, -+ svld1_hor_vnum_za64 (4, w0, p0, x1, x2), -+ svld1_hor_vnum_za64 (4, w0, p0, x1, x2)) -+ -+/* -+** ld1_vnum_za64_1_w0p1_0: -+** mov (w1[2-5]), w0 -+** ld1d { za1h\.d\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_1_w0p1_0, -+ svld1_hor_vnum_za64 (1, w0 + 1, p0, x1, 0), -+ svld1_hor_vnum_za64 (1, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za8.c -new file mode 100644 -index 000000000..c42931d3e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_vnum_za8.c -@@ -0,0 +1,112 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_vnum_za8_0_0_1: -+** incb x1 -+** mov (w1[2-5]), (?:wzr|#?0) -+** ld1b { za0h\.b\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_0_1, -+ svld1_hor_vnum_za8 (0, 0, p0, x1, 1), -+ svld1_hor_vnum_za8 (0, 0, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za8_0_1_1: -+** incb x1 -+** mov (w1[2-5]), #?1 -+** ld1b { za0h\.b\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_1_1, -+ svld1_hor_vnum_za8 (0, 1, p0, x1, 1), -+ svld1_hor_vnum_za8 (0, 1, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za8_0_0_16: -+** incb x1, all, mul #16 -+** mov (w1[2-5]), #?16 -+** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_0_16, -+ svld1_hor_vnum_za8 (0, 0, p0, x1, 16), -+ svld1_hor_vnum_za8 (0, 0, p0, x1, 16)) -+ -+/* -+** ld1_vnum_za8_0_1_16: -+** incb x1, all, mul #16 -+** mov (w1[2-5]), #?17 -+** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_1_16, -+ svld1_hor_vnum_za8 (0, 1, p0, x1, 16), -+ svld1_hor_vnum_za8 (0, 1, p0, x1, 16)) -+ -+/* -+** ld1_vnum_za8_0_w0_0: -+** mov (w1[2-5]), w0 -+** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_w0_0, -+ svld1_hor_vnum_za8 (0, w0, p0, x1, 0), -+ svld1_hor_vnum_za8 (0, w0, p0, x1, 0)) -+ -+/* -+** ld1_vnum_za8_0_w0_1: -+** incb x1 -+** mov (w1[2-5]), w0 -+** ld1b { za0h\.b\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_w0_1, -+ svld1_hor_vnum_za8 (0, w0, p0, x1, 1), -+ svld1_hor_vnum_za8 (0, w0, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za8_0_w0_15: -+** incb x1, all, mul #15 -+** mov (w1[2-5]), w0 -+** ld1b { za0h\.b\[\1, 15\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_w0_15, -+ svld1_hor_vnum_za8 (0, w0, p0, x1, 15), -+ svld1_hor_vnum_za8 (0, w0, p0, x1, 15)) -+ -+/* -+** ld1_vnum_za8_0_w0_16: -+** incb x1, all, mul #16 -+** add (w1[2-5]), w0, #?16 -+** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_w0_16, -+ svld1_hor_vnum_za8 (0, w0, p0, x1, 16), -+ svld1_hor_vnum_za8 (0, w0, p0, x1, 16)) -+ -+/* -+** ld1_vnum_za8_0_w0_x2: -+** cntb (x[0-9]+) -+** mul (x[0-9]+), (?:\1, x2|x2, \1) -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** ld1b { za0h\.b\[\3, 0\] }, p0/z, \[x1, \2\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_w0_x2, -+ svld1_hor_vnum_za8 (0, w0, p0, x1, x2), -+ svld1_hor_vnum_za8 (0, w0, p0, x1, x2)) -+ -+/* -+** ld1_vnum_za8_0_w0p1_0: -+** mov (w1[2-5]), w0 -+** ld1b { za0h\.b\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_w0p1_0, -+ svld1_hor_vnum_za8 (0, w0 + 1, p0, x1, 0), -+ svld1_hor_vnum_za8 (0, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za128.c -new file mode 100644 -index 000000000..2c6292217 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za128.c -@@ -0,0 +1,83 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_za128_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** ld1q { za0h\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za128_0_0, -+ svld1_hor_za128 (0, 0, p0, x1), -+ svld1_hor_za128 (0, 0, p0, x1)) -+ -+/* -+** ld1_za128_0_1: -+** mov (w1[2-5]), #?1 -+** ld1q { za0h\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za128_0_1, -+ svld1_hor_za128 (0, 1, p0, x1), -+ svld1_hor_za128 (0, 1, p0, x1)) -+ -+/* -+** ld1_za128_0_w0: -+** mov (w1[2-5]), w0 -+** ld1q { za0h\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za128_0_w0, -+ svld1_hor_za128 (0, w0, p0, x1), -+ svld1_hor_za128 (0, w0, p0, x1)) -+ -+/* -+** ld1_za128_0_w0_p1: -+** add (w1[2-5]), w0, #?1 -+** ld1q { za0h\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za128_0_w0_p1, -+ svld1_hor_za128 (0, w0 + 1, p0, x1), -+ svld1_hor_za128 (0, w0 + 1, p0, x1)) -+ -+/* -+** ld1_za128_7_w0: -+** mov (w1[2-5]), w0 -+** ld1q { za7h\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za128_7_w0, -+ svld1_hor_za128 (7, w0, p0, x1), -+ svld1_hor_za128 (7, w0, p0, x1)) -+ -+/* -+** ld1_za128_13_w0: -+** mov (w1[2-5]), w0 -+** ld1q { za13h\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za128_13_w0, -+ svld1_hor_za128 (13, w0, p0, x1), -+ svld1_hor_za128 (13, w0, p0, x1)) -+ -+/* -+** ld1_za128_15_w0: -+** mov (w1[2-5]), w0 -+** ld1q { za15h\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za128_15_w0, -+ svld1_hor_za128 (15, w0, p0, x1), -+ svld1_hor_za128 (15, w0, p0, x1)) -+ -+/* -+** ld1_za128_9_w0_index: -+** mov (w1[2-5]), w0 -+** ld1q { za9h\.q\[\1, 0\] }, p0/z, \[x1, x2, lsl #?4\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za128_9_w0_index, -+ svld1_hor_za128 (9, w0, p0, x1 + x2 * 16), -+ svld1_hor_za128 (9, w0, p0, x1 + x2 * 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za16.c -new file mode 100644 -index 000000000..3570bea61 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za16.c -@@ -0,0 +1,126 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_za16_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_0_0, -+ svld1_hor_za16 (0, 0, p0, x1), -+ svld1_hor_za16 (0, 0, p0, x1)) -+ -+/* It would also be OK (and perhaps better) to move 0 into a register -+ and use an offset of 7. */ -+/* -+** ld1_za16_0_7: -+** mov (w1[2-5]), #?7 -+** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_0_7, -+ svld1_hor_za16 (0, 7, p0, x1), -+ svld1_hor_za16 (0, 7, p0, x1)) -+ -+/* -+** ld1_za16_0_8: -+** mov (w1[2-5]), #?8 -+** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_0_8, -+ svld1_hor_za16 (0, 8, p0, x1), -+ svld1_hor_za16 (0, 8, p0, x1)) -+ -+/* -+** ld1_za16_0_w0: -+** mov (w1[2-5]), w0 -+** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_0_w0, -+ svld1_hor_za16 (0, w0, p0, x1), -+ svld1_hor_za16 (0, w0, p0, x1)) -+ -+/* -+** ld1_za16_0_w0_p1: -+** mov (w1[2-5]), w0 -+** ld1h { za0h\.h\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_0_w0_p1, -+ svld1_hor_za16 (0, w0 + 1, p0, x1), -+ svld1_hor_za16 (0, w0 + 1, p0, x1)) -+ -+/* -+** ld1_za16_0_w0_p7: -+** mov (w1[2-5]), w0 -+** ld1h { za0h\.h\[\1, 7\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_0_w0_p7, -+ svld1_hor_za16 (0, w0 + 7, p0, x1), -+ svld1_hor_za16 (0, w0 + 7, p0, x1)) -+ -+/* -+** ld1_za16_1_w0: -+** mov (w1[2-5]), w0 -+** ld1h { za1h\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_1_w0, -+ svld1_hor_za16 (1, w0, p0, x1), -+ svld1_hor_za16 (1, w0, p0, x1)) -+ -+ -+/* -+** ld1_za16_1_w0_p1: -+** mov (w1[2-5]), w0 -+** ld1h { za1h\.h\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_1_w0_p1, -+ svld1_hor_za16 (1, w0 + 1, p0, x1), -+ svld1_hor_za16 (1, w0 + 1, p0, x1)) -+ -+/* -+** ld1_za16_1_w0_p7: -+** mov (w1[2-5]), w0 -+** ld1h { za1h\.h\[\1, 7\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_1_w0_p7, -+ svld1_hor_za16 (1, w0 + 7, p0, x1), -+ svld1_hor_za16 (1, w0 + 7, p0, x1)) -+ -+/* -+** ld1_za16_1_w0_p5_index: -+** mov (w1[2-5]), w0 -+** ld1h { za1h\.h\[\1, 5\] }, p0/z, \[x1, x2, lsl #?1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_1_w0_p5_index, -+ svld1_hor_za16 (1, w0 + 5, p0, x1 + x2 * 2), -+ svld1_hor_za16 (1, w0 + 5, p0, x1 + x2 * 2)) -+ -+/* -+** ld1_za16_0_w0_p8: -+** add (w1[2-5]), w0, #?8 -+** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_0_w0_p8, -+ svld1_hor_za16 (0, w0 + 8, p0, x1), -+ svld1_hor_za16 (0, w0 + 8, p0, x1)) -+ -+/* -+** ld1_za16_0_w0_m1: -+** sub (w1[2-5]), w0, #?1 -+** ld1h { za0h\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_0_w0_m1, -+ svld1_hor_za16 (0, w0 - 1, p0, x1), -+ svld1_hor_za16 (0, w0 - 1, p0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za32.c -new file mode 100644 -index 000000000..a8f6606bd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za32.c -@@ -0,0 +1,125 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_za32_0_0: -+** mov (w1[2-5]), (?:w0|#?0) -+** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_0_0, -+ svld1_hor_za32 (0, 0, p0, x1), -+ svld1_hor_za32 (0, 0, p0, x1)) -+ -+/* It would also be OK (and perhaps better) to move 0 into a register -+ and use an offset of 3. */ -+/* -+** ld1_za32_0_3: -+** mov (w1[2-5]), #?3 -+** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_0_3, -+ svld1_hor_za32 (0, 3, p0, x1), -+ svld1_hor_za32 (0, 3, p0, x1)) -+ -+/* -+** ld1_za32_0_4: -+** mov (w1[2-5]), #?4 -+** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_0_4, -+ svld1_hor_za32 (0, 4, p0, x1), -+ svld1_hor_za32 (0, 4, p0, x1)) -+ -+/* -+** ld1_za32_0_w0: -+** mov (w1[2-5]), w0 -+** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_0_w0, -+ svld1_hor_za32 (0, w0, p0, x1), -+ svld1_hor_za32 (0, w0, p0, x1)) -+ -+/* -+** ld1_za32_0_w0_p1: -+** mov (w1[2-5]), w0 -+** ld1w { za0h\.s\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_0_w0_p1, -+ svld1_hor_za32 (0, w0 + 1, p0, x1), -+ svld1_hor_za32 (0, w0 + 1, p0, x1)) -+ -+/* -+** ld1_za32_0_w0_p3: -+** mov (w1[2-5]), w0 -+** ld1w { za0h\.s\[\1, 3\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_0_w0_p3, -+ svld1_hor_za32 (0, w0 + 3, p0, x1), -+ svld1_hor_za32 (0, w0 + 3, p0, x1)) -+ -+/* -+** ld1_za32_3_w0: -+** mov (w1[2-5]), w0 -+** ld1w { za3h\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_3_w0, -+ svld1_hor_za32 (3, w0, p0, x1), -+ svld1_hor_za32 (3, w0, p0, x1)) -+ -+/* -+** ld1_za32_3_w0_p1: -+** mov (w1[2-5]), w0 -+** ld1w { za3h\.s\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_3_w0_p1, -+ svld1_hor_za32 (3, w0 + 1, p0, x1), -+ svld1_hor_za32 (3, w0 + 1, p0, x1)) -+ -+/* -+** ld1_za32_3_w0_p3: -+** mov (w1[2-5]), w0 -+** ld1w { za3h\.s\[\1, 3\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_3_w0_p3, -+ svld1_hor_za32 (3, w0 + 3, p0, x1), -+ svld1_hor_za32 (3, w0 + 3, p0, x1)) -+ -+/* -+** ld1_za32_1_w0_p2_index: -+** mov (w1[2-5]), w0 -+** ld1w { za1h\.s\[\1, 2\] }, p0/z, \[x1, x2, lsl #?2\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_1_w0_p2_index, -+ svld1_hor_za32 (1, w0 + 2, p0, x1 + x2 * 4), -+ svld1_hor_za32 (1, w0 + 2, p0, x1 + x2 * 4)) -+ -+/* -+** ld1_za32_0_w0_p4: -+** add (w1[2-5]), w0, #?4 -+** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_0_w0_p4, -+ svld1_hor_za32 (0, w0 + 4, p0, x1), -+ svld1_hor_za32 (0, w0 + 4, p0, x1)) -+ -+/* -+** ld1_za32_0_w0_m1: -+** sub (w1[2-5]), w0, #?1 -+** ld1w { za0h\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_0_w0_m1, -+ svld1_hor_za32 (0, w0 - 1, p0, x1), -+ svld1_hor_za32 (0, w0 - 1, p0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za64.c -new file mode 100644 -index 000000000..f4573eb71 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za64.c -@@ -0,0 +1,105 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_za64_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_0_0, -+ svld1_hor_za64 (0, 0, p0, x1), -+ svld1_hor_za64 (0, 0, p0, x1)) -+ -+/* It would also be OK (and perhaps better) to move 0 into a register -+ and use an offset of 1. */ -+/* -+** ld1_za64_0_1: -+** mov (w1[2-5]), #?1 -+** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_0_1, -+ svld1_hor_za64 (0, 1, p0, x1), -+ svld1_hor_za64 (0, 1, p0, x1)) -+ -+/* -+** ld1_za64_0_2: -+** mov (w1[2-5]), #?2 -+** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_0_2, -+ svld1_hor_za64 (0, 2, p0, x1), -+ svld1_hor_za64 (0, 2, p0, x1)) -+ -+/* -+** ld1_za64_0_w0: -+** mov (w1[2-5]), w0 -+** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_0_w0, -+ svld1_hor_za64 (0, w0, p0, x1), -+ svld1_hor_za64 (0, w0, p0, x1)) -+ -+/* -+** ld1_za64_0_w0_p1: -+** mov (w1[2-5]), w0 -+** ld1d { za0h\.d\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_0_w0_p1, -+ svld1_hor_za64 (0, w0 + 1, p0, x1), -+ svld1_hor_za64 (0, w0 + 1, p0, x1)) -+ -+/* -+** ld1_za64_7_w0: -+** mov (w1[2-5]), w0 -+** ld1d { za7h\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_7_w0, -+ svld1_hor_za64 (7, w0, p0, x1), -+ svld1_hor_za64 (7, w0, p0, x1)) -+ -+/* -+** ld1_za64_7_w0_p1: -+** mov (w1[2-5]), w0 -+** ld1d { za7h\.d\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_7_w0_p1, -+ svld1_hor_za64 (7, w0 + 1, p0, x1), -+ svld1_hor_za64 (7, w0 + 1, p0, x1)) -+ -+/* -+** ld1_za64_5_w0_p1_index: -+** mov (w1[2-5]), w0 -+** ld1d { za5h\.d\[\1, 1\] }, p0/z, \[x1, x2, lsl #?3\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_5_w0_p1_index, -+ svld1_hor_za64 (5, w0 + 1, p0, x1 + x2 * 8), -+ svld1_hor_za64 (5, w0 + 1, p0, x1 + x2 * 8)) -+ -+/* -+** ld1_za64_0_w0_p2: -+** add (w1[2-5]), w0, #?2 -+** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_0_w0_p2, -+ svld1_hor_za64 (0, w0 + 2, p0, x1), -+ svld1_hor_za64 (0, w0 + 2, p0, x1)) -+ -+/* -+** ld1_za64_0_w0_m1: -+** sub (w1[2-5]), w0, #?1 -+** ld1d { za0h\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_0_w0_m1, -+ svld1_hor_za64 (0, w0 - 1, p0, x1), -+ svld1_hor_za64 (0, w0 - 1, p0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za8.c -new file mode 100644 -index 000000000..eef0927cd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_hor_za8.c -@@ -0,0 +1,95 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_za8_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za8_0_0, -+ svld1_hor_za8 (0, 0, p0, x1), -+ svld1_hor_za8 (0, 0, p0, x1)) -+ -+/* It would also be OK (and perhaps better) to move 0 into a register -+ and use an offset of 15. */ -+/* -+** ld1_za8_0_15: -+** mov (w1[2-5]), #?15 -+** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za8_0_15, -+ svld1_hor_za8 (0, 15, p0, x1), -+ svld1_hor_za8 (0, 15, p0, x1)) -+ -+/* -+** ld1_za8_0_16: -+** mov (w1[2-5]), #?16 -+** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za8_0_16, -+ svld1_hor_za8 (0, 16, p0, x1), -+ svld1_hor_za8 (0, 16, p0, x1)) -+ -+/* -+** ld1_za8_0_w0: -+** mov (w1[2-5]), w0 -+** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za8_0_w0, -+ svld1_hor_za8 (0, w0, p0, x1), -+ svld1_hor_za8 (0, w0, p0, x1)) -+ -+/* -+** ld1_za8_0_w0_p1: -+** mov (w1[2-5]), w0 -+** ld1b { za0h\.b\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za8_0_w0_p1, -+ svld1_hor_za8 (0, w0 + 1, p0, x1), -+ svld1_hor_za8 (0, w0 + 1, p0, x1)) -+ -+/* -+** ld1_za8_0_w0_p15: -+** mov (w1[2-5]), w0 -+** ld1b { za0h\.b\[\1, 15\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za8_0_w0_p15, -+ svld1_hor_za8 (0, w0 + 15, p0, x1), -+ svld1_hor_za8 (0, w0 + 15, p0, x1)) -+ -+/* -+** ld1_za8_0_w0_p13_index: -+** mov (w1[2-5]), w0 -+** ld1b { za0h\.b\[\1, 15\] }, p0/z, \[x1, x2\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za8_0_w0_p13_index, -+ svld1_hor_za8 (0, w0 + 15, p0, x1 + x2), -+ svld1_hor_za8 (0, w0 + 15, p0, x1 + x2)) -+ -+/* -+** ld1_za8_0_w0_p16: -+** add (w1[2-5]), w0, #?16 -+** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za8_0_w0_p16, -+ svld1_hor_za8 (0, w0 + 16, p0, x1), -+ svld1_hor_za8 (0, w0 + 16, p0, x1)) -+ -+/* -+** ld1_za8_0_w0_m1: -+** sub (w1[2-5]), w0, #?1 -+** ld1b { za0h\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za8_0_w0_m1, -+ svld1_hor_za8 (0, w0 - 1, p0, x1), -+ svld1_hor_za8 (0, w0 - 1, p0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za128.c -new file mode 100644 -index 000000000..e90da4b33 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za128.c -@@ -0,0 +1,77 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_vnum_za128_0_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** ld1q { za0v\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za128_0_0_0, -+ svld1_ver_vnum_za128 (0, 0, p0, x1, 0), -+ svld1_ver_vnum_za128 (0, 0, p0, x1, 0)) -+ -+/* -+** ld1_vnum_za128_7_1_0: -+** mov (w1[2-5]), #?1 -+** ld1q { za7v\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za128_7_1_0, -+ svld1_ver_vnum_za128 (7, 1, p0, x1, 0), -+ svld1_ver_vnum_za128 (7, 1, p0, x1, 0)) -+ -+/* -+** ld1_vnum_za128_11_1_5: -+** incb x1, all, mul #5 -+** mov (w1[2-5]), #?6 -+** ld1q { za11v\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za128_11_1_5, -+ svld1_ver_vnum_za128 (11, 1, p0, x1, 5), -+ svld1_ver_vnum_za128 (11, 1, p0, x1, 5)) -+ -+/* -+** ld1_vnum_za128_3_w0_0: -+** mov (w1[2-5]), w0 -+** ld1q { za3v\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za128_3_w0_0, -+ svld1_ver_vnum_za128 (3, w0, p0, x1, 0), -+ svld1_ver_vnum_za128 (3, w0, p0, x1, 0)) -+ -+/* -+** ld1_vnum_za128_5_w0_0: -+** incb x1, all, mul #13 -+** add (w1[2-5]), w0, #?13 -+** ld1q { za5v\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za128_5_w0_0, -+ svld1_ver_vnum_za128 (5, w0, p0, x1, 13), -+ svld1_ver_vnum_za128 (5, w0, p0, x1, 13)) -+ -+/* -+** ld1_vnum_za128_11_w0_0: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** ld1q { za11v\.q\[\3, 0\] }, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za128_11_w0_0, -+ svld1_ver_vnum_za128 (11, w0, p0, x1, x2), -+ svld1_ver_vnum_za128 (11, w0, p0, x1, x2)) -+ -+/* -+** ld1_vnum_za128_15_w0p1_0: -+** add (w1[2-5]), w0, #?1 -+** ld1q { za15v\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za128_15_w0p1_0, -+ svld1_ver_vnum_za128 (15, w0 + 1, p0, x1, 0), -+ svld1_ver_vnum_za128 (15, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za16.c -new file mode 100644 -index 000000000..7868cf4ea ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za16.c -@@ -0,0 +1,123 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_vnum_za16_1_0_1: -+** incb x1 -+** mov (w1[2-5]), (?:wzr|#?0) -+** ld1h { za1v\.h\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_1_0_1, -+ svld1_ver_vnum_za16 (1, 0, p0, x1, 1), -+ svld1_ver_vnum_za16 (1, 0, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za16_1_1_1: -+** incb x1 -+** mov (w1[2-5]), #?1 -+** ld1h { za1v\.h\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_1_1_1, -+ svld1_ver_vnum_za16 (1, 1, p0, x1, 1), -+ svld1_ver_vnum_za16 (1, 1, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za16_0_0_8: -+** incb x1, all, mul #8 -+** mov (w1[2-5]), #?8 -+** ld1h { za0v\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_0_0_8, -+ svld1_ver_vnum_za16 (0, 0, p0, x1, 8), -+ svld1_ver_vnum_za16 (0, 0, p0, x1, 8)) -+ -+/* -+** ld1_vnum_za16_0_1_8: -+** incb x1, all, mul #8 -+** mov (w1[2-5]), #?9 -+** ld1h { za0v\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_0_1_8, -+ svld1_ver_vnum_za16 (0, 1, p0, x1, 8), -+ svld1_ver_vnum_za16 (0, 1, p0, x1, 8)) -+ -+/* -+** ld1_vnum_za16_0_w0_0: -+** mov (w1[2-5]), w0 -+** ld1h { za0v\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_0_w0_0, -+ svld1_ver_vnum_za16 (0, w0, p0, x1, 0), -+ svld1_ver_vnum_za16 (0, w0, p0, x1, 0)) -+ -+/* -+** ld1_vnum_za16_0_w0_1: -+** incb x1 -+** mov (w1[2-5]), w0 -+** ld1h { za0v\.h\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_0_w0_1, -+ svld1_ver_vnum_za16 (0, w0, p0, x1, 1), -+ svld1_ver_vnum_za16 (0, w0, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za16_0_w0_7: -+** incb x1, all, mul #7 -+** mov (w1[2-5]), w0 -+** ld1h { za0v\.h\[\1, 7\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_0_w0_7, -+ svld1_ver_vnum_za16 (0, w0, p0, x1, 7), -+ svld1_ver_vnum_za16 (0, w0, p0, x1, 7)) -+ -+/* -+** ld1_vnum_za16_1_w0_8: -+** incb x1, all, mul #8 -+** add (w1[2-5]), w0, #?8 -+** ld1h { za1v\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_1_w0_8, -+ svld1_ver_vnum_za16 (1, w0, p0, x1, 8), -+ svld1_ver_vnum_za16 (1, w0, p0, x1, 8)) -+ -+/* -+** ld1_vnum_za16_1_w0_13: -+** incb x1, all, mul #13 -+** add (w1[2-5]), w0, #?13 -+** ld1h { za1v\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_1_w0_13, -+ svld1_ver_vnum_za16 (1, w0, p0, x1, 13), -+ svld1_ver_vnum_za16 (1, w0, p0, x1, 13)) -+ -+/* -+** ld1_vnum_za16_0_w0_x2: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** ld1h { za0v\.h\[\3, 0\] }, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_0_w0_x2, -+ svld1_ver_vnum_za16 (0, w0, p0, x1, x2), -+ svld1_ver_vnum_za16 (0, w0, p0, x1, x2)) -+ -+/* -+** ld1_vnum_za16_1_w0p1_0: -+** mov (w1[2-5]), w0 -+** ld1h { za1v\.h\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za16_1_w0p1_0, -+ svld1_ver_vnum_za16 (1, w0 + 1, p0, x1, 0), -+ svld1_ver_vnum_za16 (1, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za32.c -new file mode 100644 -index 000000000..053b60140 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za32.c -@@ -0,0 +1,123 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_vnum_za32_3_0_1: -+** incb x1 -+** mov (w1[2-5]), (?:wzr|#?0) -+** ld1w { za3v\.s\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_3_0_1, -+ svld1_ver_vnum_za32 (3, 0, p0, x1, 1), -+ svld1_ver_vnum_za32 (3, 0, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za32_2_1_1: -+** incb x1 -+** mov (w1[2-5]), #?1 -+** ld1w { za2v\.s\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_2_1_1, -+ svld1_ver_vnum_za32 (2, 1, p0, x1, 1), -+ svld1_ver_vnum_za32 (2, 1, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za32_0_0_4: -+** incb x1, all, mul #4 -+** mov (w1[2-5]), #?4 -+** ld1w { za0v\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_0_0_4, -+ svld1_ver_vnum_za32 (0, 0, p0, x1, 4), -+ svld1_ver_vnum_za32 (0, 0, p0, x1, 4)) -+ -+/* -+** ld1_vnum_za32_2_1_4: -+** incb x1, all, mul #4 -+** mov (w1[2-5]), #?5 -+** ld1w { za2v\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_2_1_4, -+ svld1_ver_vnum_za32 (2, 1, p0, x1, 4), -+ svld1_ver_vnum_za32 (2, 1, p0, x1, 4)) -+ -+/* -+** ld1_vnum_za32_0_w0_0: -+** mov (w1[2-5]), w0 -+** ld1w { za0v\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_0_w0_0, -+ svld1_ver_vnum_za32 (0, w0, p0, x1, 0), -+ svld1_ver_vnum_za32 (0, w0, p0, x1, 0)) -+ -+/* -+** ld1_vnum_za32_0_w0_1: -+** incb x1 -+** mov (w1[2-5]), w0 -+** ld1w { za0v\.s\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_0_w0_1, -+ svld1_ver_vnum_za32 (0, w0, p0, x1, 1), -+ svld1_ver_vnum_za32 (0, w0, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za32_0_w0_3: -+** incb x1, all, mul #3 -+** mov (w1[2-5]), w0 -+** ld1w { za0v\.s\[\1, 3\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_0_w0_3, -+ svld1_ver_vnum_za32 (0, w0, p0, x1, 3), -+ svld1_ver_vnum_za32 (0, w0, p0, x1, 3)) -+ -+/* -+** ld1_vnum_za32_1_w0_4: -+** incb x1, all, mul #4 -+** add (w1[2-5]), w0, #?4 -+** ld1w { za1v\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_1_w0_4, -+ svld1_ver_vnum_za32 (1, w0, p0, x1, 4), -+ svld1_ver_vnum_za32 (1, w0, p0, x1, 4)) -+ -+/* -+** ld1_vnum_za32_3_w0_13: -+** incb x1, all, mul #13 -+** add (w1[2-5]), w0, #?13 -+** ld1w { za3v\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_3_w0_13, -+ svld1_ver_vnum_za32 (3, w0, p0, x1, 13), -+ svld1_ver_vnum_za32 (3, w0, p0, x1, 13)) -+ -+/* -+** ld1_vnum_za32_0_w0_x2: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** ld1w { za0v\.s\[\3, 0\] }, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_0_w0_x2, -+ svld1_ver_vnum_za32 (0, w0, p0, x1, x2), -+ svld1_ver_vnum_za32 (0, w0, p0, x1, x2)) -+ -+/* -+** ld1_vnum_za32_1_w0p1_0: -+** mov (w1[2-5]), w0 -+** ld1w { za1v\.s\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za32_1_w0p1_0, -+ svld1_ver_vnum_za32 (1, w0 + 1, p0, x1, 0), -+ svld1_ver_vnum_za32 (1, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za64.c -new file mode 100644 -index 000000000..d04764979 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za64.c -@@ -0,0 +1,112 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_vnum_za64_3_0_1: -+** incb x1 -+** mov (w1[2-5]), (?:wzr|#?0) -+** ld1d { za3v\.d\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_3_0_1, -+ svld1_ver_vnum_za64 (3, 0, p0, x1, 1), -+ svld1_ver_vnum_za64 (3, 0, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za64_7_1_1: -+** incb x1 -+** mov (w1[2-5]), #?1 -+** ld1d { za7v\.d\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_7_1_1, -+ svld1_ver_vnum_za64 (7, 1, p0, x1, 1), -+ svld1_ver_vnum_za64 (7, 1, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za64_0_0_2: -+** incb x1, all, mul #2 -+** mov (w1[2-5]), #?2 -+** ld1d { za0v\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_0_0_2, -+ svld1_ver_vnum_za64 (0, 0, p0, x1, 2), -+ svld1_ver_vnum_za64 (0, 0, p0, x1, 2)) -+ -+/* -+** ld1_vnum_za64_5_1_2: -+** incb x1, all, mul #2 -+** mov (w1[2-5]), #?3 -+** ld1d { za5v\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_5_1_2, -+ svld1_ver_vnum_za64 (5, 1, p0, x1, 2), -+ svld1_ver_vnum_za64 (5, 1, p0, x1, 2)) -+ -+/* -+** ld1_vnum_za64_0_w0_0: -+** mov (w1[2-5]), w0 -+** ld1d { za0v\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_0_w0_0, -+ svld1_ver_vnum_za64 (0, w0, p0, x1, 0), -+ svld1_ver_vnum_za64 (0, w0, p0, x1, 0)) -+ -+/* -+** ld1_vnum_za64_0_w0_1: -+** incb x1 -+** mov (w1[2-5]), w0 -+** ld1d { za0v\.d\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_0_w0_1, -+ svld1_ver_vnum_za64 (0, w0, p0, x1, 1), -+ svld1_ver_vnum_za64 (0, w0, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za64_6_w0_2: -+** incb x1, all, mul #2 -+** add (w1[2-5]), w0, #?2 -+** ld1d { za6v\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_6_w0_2, -+ svld1_ver_vnum_za64 (6, w0, p0, x1, 2), -+ svld1_ver_vnum_za64 (6, w0, p0, x1, 2)) -+ -+/* -+** ld1_vnum_za64_2_w0_13: -+** incb x1, all, mul #13 -+** add (w1[2-5]), w0, #?13 -+** ld1d { za2v\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_2_w0_13, -+ svld1_ver_vnum_za64 (2, w0, p0, x1, 13), -+ svld1_ver_vnum_za64 (2, w0, p0, x1, 13)) -+ -+/* -+** ld1_vnum_za64_4_w0_x2: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** ld1d { za4v\.d\[\3, 0\] }, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_4_w0_x2, -+ svld1_ver_vnum_za64 (4, w0, p0, x1, x2), -+ svld1_ver_vnum_za64 (4, w0, p0, x1, x2)) -+ -+/* -+** ld1_vnum_za64_1_w0p1_0: -+** mov (w1[2-5]), w0 -+** ld1d { za1v\.d\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za64_1_w0p1_0, -+ svld1_ver_vnum_za64 (1, w0 + 1, p0, x1, 0), -+ svld1_ver_vnum_za64 (1, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za8.c -new file mode 100644 -index 000000000..e99d95e3a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_vnum_za8.c -@@ -0,0 +1,112 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_vnum_za8_0_0_1: -+** incb x1 -+** mov (w1[2-5]), (?:wzr|#?0) -+** ld1b { za0v\.b\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_0_1, -+ svld1_ver_vnum_za8 (0, 0, p0, x1, 1), -+ svld1_ver_vnum_za8 (0, 0, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za8_0_1_1: -+** incb x1 -+** mov (w1[2-5]), #?1 -+** ld1b { za0v\.b\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_1_1, -+ svld1_ver_vnum_za8 (0, 1, p0, x1, 1), -+ svld1_ver_vnum_za8 (0, 1, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za8_0_0_16: -+** incb x1, all, mul #16 -+** mov (w1[2-5]), #?16 -+** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_0_16, -+ svld1_ver_vnum_za8 (0, 0, p0, x1, 16), -+ svld1_ver_vnum_za8 (0, 0, p0, x1, 16)) -+ -+/* -+** ld1_vnum_za8_0_1_16: -+** incb x1, all, mul #16 -+** mov (w1[2-5]), #?17 -+** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_1_16, -+ svld1_ver_vnum_za8 (0, 1, p0, x1, 16), -+ svld1_ver_vnum_za8 (0, 1, p0, x1, 16)) -+ -+/* -+** ld1_vnum_za8_0_w0_0: -+** mov (w1[2-5]), w0 -+** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_w0_0, -+ svld1_ver_vnum_za8 (0, w0, p0, x1, 0), -+ svld1_ver_vnum_za8 (0, w0, p0, x1, 0)) -+ -+/* -+** ld1_vnum_za8_0_w0_1: -+** incb x1 -+** mov (w1[2-5]), w0 -+** ld1b { za0v\.b\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_w0_1, -+ svld1_ver_vnum_za8 (0, w0, p0, x1, 1), -+ svld1_ver_vnum_za8 (0, w0, p0, x1, 1)) -+ -+/* -+** ld1_vnum_za8_0_w0_15: -+** incb x1, all, mul #15 -+** mov (w1[2-5]), w0 -+** ld1b { za0v\.b\[\1, 15\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_w0_15, -+ svld1_ver_vnum_za8 (0, w0, p0, x1, 15), -+ svld1_ver_vnum_za8 (0, w0, p0, x1, 15)) -+ -+/* -+** ld1_vnum_za8_0_w0_16: -+** incb x1, all, mul #16 -+** add (w1[2-5]), w0, #?16 -+** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_w0_16, -+ svld1_ver_vnum_za8 (0, w0, p0, x1, 16), -+ svld1_ver_vnum_za8 (0, w0, p0, x1, 16)) -+ -+/* -+** ld1_vnum_za8_0_w0_x2: -+** cntb (x[0-9]+) -+** mul (x[0-9]+), (?:\1, x2|x2, \1) -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** ld1b { za0v\.b\[\3, 0\] }, p0/z, \[x1, \2\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_w0_x2, -+ svld1_ver_vnum_za8 (0, w0, p0, x1, x2), -+ svld1_ver_vnum_za8 (0, w0, p0, x1, x2)) -+ -+/* -+** ld1_vnum_za8_0_w0p1_0: -+** mov (w1[2-5]), w0 -+** ld1b { za0v\.b\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_vnum_za8_0_w0p1_0, -+ svld1_ver_vnum_za8 (0, w0 + 1, p0, x1, 0), -+ svld1_ver_vnum_za8 (0, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za128.c -new file mode 100644 -index 000000000..e81f40258 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za128.c -@@ -0,0 +1,83 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_za128_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** ld1q { za0v\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za128_0_0, -+ svld1_ver_za128 (0, 0, p0, x1), -+ svld1_ver_za128 (0, 0, p0, x1)) -+ -+/* -+** ld1_za128_0_1: -+** mov (w1[2-5]), #?1 -+** ld1q { za0v\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za128_0_1, -+ svld1_ver_za128 (0, 1, p0, x1), -+ svld1_ver_za128 (0, 1, p0, x1)) -+ -+/* -+** ld1_za128_0_w0: -+** mov (w1[2-5]), w0 -+** ld1q { za0v\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za128_0_w0, -+ svld1_ver_za128 (0, w0, p0, x1), -+ svld1_ver_za128 (0, w0, p0, x1)) -+ -+/* -+** ld1_za128_0_w0_p1: -+** add (w1[2-5]), w0, #?1 -+** ld1q { za0v\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za128_0_w0_p1, -+ svld1_ver_za128 (0, w0 + 1, p0, x1), -+ svld1_ver_za128 (0, w0 + 1, p0, x1)) -+ -+/* -+** ld1_za128_7_w0: -+** mov (w1[2-5]), w0 -+** ld1q { za7v\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za128_7_w0, -+ svld1_ver_za128 (7, w0, p0, x1), -+ svld1_ver_za128 (7, w0, p0, x1)) -+ -+/* -+** ld1_za128_13_w0: -+** mov (w1[2-5]), w0 -+** ld1q { za13v\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za128_13_w0, -+ svld1_ver_za128 (13, w0, p0, x1), -+ svld1_ver_za128 (13, w0, p0, x1)) -+ -+/* -+** ld1_za128_15_w0: -+** mov (w1[2-5]), w0 -+** ld1q { za15v\.q\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za128_15_w0, -+ svld1_ver_za128 (15, w0, p0, x1), -+ svld1_ver_za128 (15, w0, p0, x1)) -+ -+/* -+** ld1_za128_9_w0_index: -+** mov (w1[2-5]), w0 -+** ld1q { za9v\.q\[\1, 0\] }, p0/z, \[x1, x2, lsl #?4\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za128_9_w0_index, -+ svld1_ver_za128 (9, w0, p0, x1 + x2 * 16), -+ svld1_ver_za128 (9, w0, p0, x1 + x2 * 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za16.c -new file mode 100644 -index 000000000..0938b1eba ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za16.c -@@ -0,0 +1,126 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_za16_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** ld1h { za0v\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_0_0, -+ svld1_ver_za16 (0, 0, p0, x1), -+ svld1_ver_za16 (0, 0, p0, x1)) -+ -+/* It would also be OK (and perhaps better) to move 0 into a register -+ and use an offset of 7. */ -+/* -+** ld1_za16_0_7: -+** mov (w1[2-5]), #?7 -+** ld1h { za0v\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_0_7, -+ svld1_ver_za16 (0, 7, p0, x1), -+ svld1_ver_za16 (0, 7, p0, x1)) -+ -+/* -+** ld1_za16_0_8: -+** mov (w1[2-5]), #?8 -+** ld1h { za0v\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_0_8, -+ svld1_ver_za16 (0, 8, p0, x1), -+ svld1_ver_za16 (0, 8, p0, x1)) -+ -+/* -+** ld1_za16_0_w0: -+** mov (w1[2-5]), w0 -+** ld1h { za0v\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_0_w0, -+ svld1_ver_za16 (0, w0, p0, x1), -+ svld1_ver_za16 (0, w0, p0, x1)) -+ -+/* -+** ld1_za16_0_w0_p1: -+** mov (w1[2-5]), w0 -+** ld1h { za0v\.h\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_0_w0_p1, -+ svld1_ver_za16 (0, w0 + 1, p0, x1), -+ svld1_ver_za16 (0, w0 + 1, p0, x1)) -+ -+/* -+** ld1_za16_0_w0_p7: -+** mov (w1[2-5]), w0 -+** ld1h { za0v\.h\[\1, 7\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_0_w0_p7, -+ svld1_ver_za16 (0, w0 + 7, p0, x1), -+ svld1_ver_za16 (0, w0 + 7, p0, x1)) -+ -+/* -+** ld1_za16_1_w0: -+** mov (w1[2-5]), w0 -+** ld1h { za1v\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_1_w0, -+ svld1_ver_za16 (1, w0, p0, x1), -+ svld1_ver_za16 (1, w0, p0, x1)) -+ -+ -+/* -+** ld1_za16_1_w0_p1: -+** mov (w1[2-5]), w0 -+** ld1h { za1v\.h\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_1_w0_p1, -+ svld1_ver_za16 (1, w0 + 1, p0, x1), -+ svld1_ver_za16 (1, w0 + 1, p0, x1)) -+ -+/* -+** ld1_za16_1_w0_p7: -+** mov (w1[2-5]), w0 -+** ld1h { za1v\.h\[\1, 7\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_1_w0_p7, -+ svld1_ver_za16 (1, w0 + 7, p0, x1), -+ svld1_ver_za16 (1, w0 + 7, p0, x1)) -+ -+/* -+** ld1_za16_1_w0_p5_index: -+** mov (w1[2-5]), w0 -+** ld1h { za1v\.h\[\1, 5\] }, p0/z, \[x1, x2, lsl #?1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_1_w0_p5_index, -+ svld1_ver_za16 (1, w0 + 5, p0, x1 + x2 * 2), -+ svld1_ver_za16 (1, w0 + 5, p0, x1 + x2 * 2)) -+ -+/* -+** ld1_za16_0_w0_p8: -+** add (w1[2-5]), w0, #?8 -+** ld1h { za0v\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_0_w0_p8, -+ svld1_ver_za16 (0, w0 + 8, p0, x1), -+ svld1_ver_za16 (0, w0 + 8, p0, x1)) -+ -+/* -+** ld1_za16_0_w0_m1: -+** sub (w1[2-5]), w0, #?1 -+** ld1h { za0v\.h\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za16_0_w0_m1, -+ svld1_ver_za16 (0, w0 - 1, p0, x1), -+ svld1_ver_za16 (0, w0 - 1, p0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za32.c -new file mode 100644 -index 000000000..bb9d93184 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za32.c -@@ -0,0 +1,125 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_za32_0_0: -+** mov (w1[2-5]), (?:w0|#?0) -+** ld1w { za0v\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_0_0, -+ svld1_ver_za32 (0, 0, p0, x1), -+ svld1_ver_za32 (0, 0, p0, x1)) -+ -+/* It would also be OK (and perhaps better) to move 0 into a register -+ and use an offset of 3. */ -+/* -+** ld1_za32_0_3: -+** mov (w1[2-5]), #?3 -+** ld1w { za0v\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_0_3, -+ svld1_ver_za32 (0, 3, p0, x1), -+ svld1_ver_za32 (0, 3, p0, x1)) -+ -+/* -+** ld1_za32_0_4: -+** mov (w1[2-5]), #?4 -+** ld1w { za0v\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_0_4, -+ svld1_ver_za32 (0, 4, p0, x1), -+ svld1_ver_za32 (0, 4, p0, x1)) -+ -+/* -+** ld1_za32_0_w0: -+** mov (w1[2-5]), w0 -+** ld1w { za0v\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_0_w0, -+ svld1_ver_za32 (0, w0, p0, x1), -+ svld1_ver_za32 (0, w0, p0, x1)) -+ -+/* -+** ld1_za32_0_w0_p1: -+** mov (w1[2-5]), w0 -+** ld1w { za0v\.s\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_0_w0_p1, -+ svld1_ver_za32 (0, w0 + 1, p0, x1), -+ svld1_ver_za32 (0, w0 + 1, p0, x1)) -+ -+/* -+** ld1_za32_0_w0_p3: -+** mov (w1[2-5]), w0 -+** ld1w { za0v\.s\[\1, 3\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_0_w0_p3, -+ svld1_ver_za32 (0, w0 + 3, p0, x1), -+ svld1_ver_za32 (0, w0 + 3, p0, x1)) -+ -+/* -+** ld1_za32_3_w0: -+** mov (w1[2-5]), w0 -+** ld1w { za3v\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_3_w0, -+ svld1_ver_za32 (3, w0, p0, x1), -+ svld1_ver_za32 (3, w0, p0, x1)) -+ -+/* -+** ld1_za32_3_w0_p1: -+** mov (w1[2-5]), w0 -+** ld1w { za3v\.s\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_3_w0_p1, -+ svld1_ver_za32 (3, w0 + 1, p0, x1), -+ svld1_ver_za32 (3, w0 + 1, p0, x1)) -+ -+/* -+** ld1_za32_3_w0_p3: -+** mov (w1[2-5]), w0 -+** ld1w { za3v\.s\[\1, 3\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_3_w0_p3, -+ svld1_ver_za32 (3, w0 + 3, p0, x1), -+ svld1_ver_za32 (3, w0 + 3, p0, x1)) -+ -+/* -+** ld1_za32_1_w0_p2_index: -+** mov (w1[2-5]), w0 -+** ld1w { za1v\.s\[\1, 2\] }, p0/z, \[x1, x2, lsl #?2\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_1_w0_p2_index, -+ svld1_ver_za32 (1, w0 + 2, p0, x1 + x2 * 4), -+ svld1_ver_za32 (1, w0 + 2, p0, x1 + x2 * 4)) -+ -+/* -+** ld1_za32_0_w0_p4: -+** add (w1[2-5]), w0, #?4 -+** ld1w { za0v\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_0_w0_p4, -+ svld1_ver_za32 (0, w0 + 4, p0, x1), -+ svld1_ver_za32 (0, w0 + 4, p0, x1)) -+ -+/* -+** ld1_za32_0_w0_m1: -+** sub (w1[2-5]), w0, #?1 -+** ld1w { za0v\.s\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za32_0_w0_m1, -+ svld1_ver_za32 (0, w0 - 1, p0, x1), -+ svld1_ver_za32 (0, w0 - 1, p0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za64.c -new file mode 100644 -index 000000000..58d73ad06 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za64.c -@@ -0,0 +1,105 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_za64_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** ld1d { za0v\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_0_0, -+ svld1_ver_za64 (0, 0, p0, x1), -+ svld1_ver_za64 (0, 0, p0, x1)) -+ -+/* It would also be OK (and perhaps better) to move 0 into a register -+ and use an offset of 1. */ -+/* -+** ld1_za64_0_1: -+** mov (w1[2-5]), #?1 -+** ld1d { za0v\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_0_1, -+ svld1_ver_za64 (0, 1, p0, x1), -+ svld1_ver_za64 (0, 1, p0, x1)) -+ -+/* -+** ld1_za64_0_2: -+** mov (w1[2-5]), #?2 -+** ld1d { za0v\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_0_2, -+ svld1_ver_za64 (0, 2, p0, x1), -+ svld1_ver_za64 (0, 2, p0, x1)) -+ -+/* -+** ld1_za64_0_w0: -+** mov (w1[2-5]), w0 -+** ld1d { za0v\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_0_w0, -+ svld1_ver_za64 (0, w0, p0, x1), -+ svld1_ver_za64 (0, w0, p0, x1)) -+ -+/* -+** ld1_za64_0_w0_p1: -+** mov (w1[2-5]), w0 -+** ld1d { za0v\.d\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_0_w0_p1, -+ svld1_ver_za64 (0, w0 + 1, p0, x1), -+ svld1_ver_za64 (0, w0 + 1, p0, x1)) -+ -+/* -+** ld1_za64_7_w0: -+** mov (w1[2-5]), w0 -+** ld1d { za7v\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_7_w0, -+ svld1_ver_za64 (7, w0, p0, x1), -+ svld1_ver_za64 (7, w0, p0, x1)) -+ -+/* -+** ld1_za64_7_w0_p1: -+** mov (w1[2-5]), w0 -+** ld1d { za7v\.d\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_7_w0_p1, -+ svld1_ver_za64 (7, w0 + 1, p0, x1), -+ svld1_ver_za64 (7, w0 + 1, p0, x1)) -+ -+/* -+** ld1_za64_5_w0_p1_index: -+** mov (w1[2-5]), w0 -+** ld1d { za5v\.d\[\1, 1\] }, p0/z, \[x1, x2, lsl #?3\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_5_w0_p1_index, -+ svld1_ver_za64 (5, w0 + 1, p0, x1 + x2 * 8), -+ svld1_ver_za64 (5, w0 + 1, p0, x1 + x2 * 8)) -+ -+/* -+** ld1_za64_0_w0_p2: -+** add (w1[2-5]), w0, #?2 -+** ld1d { za0v\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_0_w0_p2, -+ svld1_ver_za64 (0, w0 + 2, p0, x1), -+ svld1_ver_za64 (0, w0 + 2, p0, x1)) -+ -+/* -+** ld1_za64_0_w0_m1: -+** sub (w1[2-5]), w0, #?1 -+** ld1d { za0v\.d\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za64_0_w0_m1, -+ svld1_ver_za64 (0, w0 - 1, p0, x1), -+ svld1_ver_za64 (0, w0 - 1, p0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za8.c -new file mode 100644 -index 000000000..38211b211 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ld1_ver_za8.c -@@ -0,0 +1,95 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ld1_za8_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za8_0_0, -+ svld1_ver_za8 (0, 0, p0, x1), -+ svld1_ver_za8 (0, 0, p0, x1)) -+ -+/* It would also be OK (and perhaps better) to move 0 into a register -+ and use an offset of 15. */ -+/* -+** ld1_za8_0_15: -+** mov (w1[2-5]), #?15 -+** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za8_0_15, -+ svld1_ver_za8 (0, 15, p0, x1), -+ svld1_ver_za8 (0, 15, p0, x1)) -+ -+/* -+** ld1_za8_0_16: -+** mov (w1[2-5]), #?16 -+** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za8_0_16, -+ svld1_ver_za8 (0, 16, p0, x1), -+ svld1_ver_za8 (0, 16, p0, x1)) -+ -+/* -+** ld1_za8_0_w0: -+** mov (w1[2-5]), w0 -+** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za8_0_w0, -+ svld1_ver_za8 (0, w0, p0, x1), -+ svld1_ver_za8 (0, w0, p0, x1)) -+ -+/* -+** ld1_za8_0_w0_p1: -+** mov (w1[2-5]), w0 -+** ld1b { za0v\.b\[\1, 1\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za8_0_w0_p1, -+ svld1_ver_za8 (0, w0 + 1, p0, x1), -+ svld1_ver_za8 (0, w0 + 1, p0, x1)) -+ -+/* -+** ld1_za8_0_w0_p15: -+** mov (w1[2-5]), w0 -+** ld1b { za0v\.b\[\1, 15\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za8_0_w0_p15, -+ svld1_ver_za8 (0, w0 + 15, p0, x1), -+ svld1_ver_za8 (0, w0 + 15, p0, x1)) -+ -+/* -+** ld1_za8_0_w0_p13_index: -+** mov (w1[2-5]), w0 -+** ld1b { za0v\.b\[\1, 15\] }, p0/z, \[x1, x2\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za8_0_w0_p13_index, -+ svld1_ver_za8 (0, w0 + 15, p0, x1 + x2), -+ svld1_ver_za8 (0, w0 + 15, p0, x1 + x2)) -+ -+/* -+** ld1_za8_0_w0_p16: -+** add (w1[2-5]), w0, #?16 -+** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za8_0_w0_p16, -+ svld1_ver_za8 (0, w0 + 16, p0, x1), -+ svld1_ver_za8 (0, w0 + 16, p0, x1)) -+ -+/* -+** ld1_za8_0_w0_m1: -+** sub (w1[2-5]), w0, #?1 -+** ld1b { za0v\.b\[\1, 0\] }, p0/z, \[x1\] -+** ret -+*/ -+TEST_LOAD_ZA (ld1_za8_0_w0_m1, -+ svld1_ver_za8 (0, w0 - 1, p0, x1), -+ svld1_ver_za8 (0, w0 - 1, p0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_s.c -new file mode 100644 -index 000000000..90495d080 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_s.c -@@ -0,0 +1,147 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ldr_vnum_za_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_0_0, -+ svldr_vnum_za (0, x1, 0), -+ svldr_vnum_za (0, x1, 0)) -+ -+/* -+** ldr_vnum_za_0_1: -+** mov (w1[2-5]), (?:wzr|#?0) -+** ldr za\[\1, 1\], \[x1(?:, #1, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_0_1, -+ svldr_vnum_za (0, x1, 1), -+ svldr_vnum_za (0, x1, 1)) -+ -+/* -+** ldr_vnum_za_1_0: -+** mov (w1[2-5]), #?1 -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_1_0, -+ svldr_vnum_za (1, x1, 0), -+ svldr_vnum_za (1, x1, 0)) -+ -+/* -+** ldr_vnum_za_1_2: -+** mov (w1[2-5]), #?1 -+** ldr za\[\1, 2\], \[x1(?:, #2, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_1_2, -+ svldr_vnum_za (1, x1, 2), -+ svldr_vnum_za (1, x1, 2)) -+ -+/* -+** ldr_vnum_za_w0_0: -+** mov (w1[2-5]), w0 -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_w0_0, -+ svldr_vnum_za (w0, x1, 0), -+ svldr_vnum_za (w0, x1, 0)) -+ -+/* -+** ldr_vnum_za_w0_1: -+** mov (w1[2-5]), w0 -+** ldr za\[\1, 1\], \[x1, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_w0_1, -+ svldr_vnum_za (w0, x1, 1), -+ svldr_vnum_za (w0, x1, 1)) -+ -+/* -+** ldr_vnum_za_w0_13: -+** mov (w1[2-5]), w0 -+** ldr za\[\1, 13\], \[x1, #13, mul vl\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_w0_13, -+ svldr_vnum_za (w0, x1, 13), -+ svldr_vnum_za (w0, x1, 13)) -+ -+/* -+** ldr_vnum_za_w0_15: -+** mov (w1[2-5]), w0 -+** ldr za\[\1, 15\], \[x1, #15, mul vl\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_w0_15, -+ svldr_vnum_za (w0, x1, 15), -+ svldr_vnum_za (w0, x1, 15)) -+ -+/* -+** ldr_vnum_za_w0_16: -+** ( -+** add (w1[2-5]), w0, #?16 -+** incb x1, all, mul #16 -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** | -+** incb x1, all, mul #16 -+** add (w1[2-5]), w0, #?16 -+** ldr za\[\2, 0\], \[x1(?:, #0, mul vl)?\] -+** ) -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_w0_16, -+ svldr_vnum_za (w0, x1, 16), -+ svldr_vnum_za (w0, x1, 16)) -+ -+/* -+** ldr_vnum_za_w0_m1: -+** ( -+** sub (w1[2-5]), w0, #?1 -+** decb x1 -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** | -+** decb x1 -+** sub (w1[2-5]), w0, #?1 -+** ldr za\[\2, 0\], \[x1(?:, #0, mul vl)?\] -+** ) -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_w0_m1, -+ svldr_vnum_za (w0, x1, -1), -+ svldr_vnum_za (w0, x1, -1)) -+ -+/* -+** ldr_vnum_za_w0p1_0: -+** add (w1[2-5]), w0, #?1 -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_w0p1_0, -+ svldr_vnum_za (w0 + 1, x1, 0), -+ svldr_vnum_za (w0 + 1, x1, 0)) -+ -+/* -+** ldr_vnum_za_w0m1_1: -+** sub (w1[2-5]), w0, #?1 -+** ldr za\[\1, 1\], \[x1(?:, #1, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_w0m1_1, -+ svldr_vnum_za (w0 - 1, x1, 1), -+ svldr_vnum_za (w0 - 1, x1, 1)) -+ -+/* -+** ldr_vnum_za_w0p2_3: -+** add (w1[2-5]), w0, #?2 -+** ldr za\[\1, 3\], \[x1(?:, #3, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_w0p2_3, -+ svldr_vnum_za (w0 + 2, x1, 3), -+ svldr_vnum_za (w0 + 2, x1, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_sc.c -new file mode 100644 -index 000000000..dfc2d139f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_vnum_za_sc.c -@@ -0,0 +1,148 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#define STREAMING_COMPATIBLE -+#include "test_sme_acle.h" -+ -+/* -+** ldr_vnum_za_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_0_0, -+ svldr_vnum_za (0, x1, 0), -+ svldr_vnum_za (0, x1, 0)) -+ -+/* -+** ldr_vnum_za_0_1: -+** mov (w1[2-5]), (?:wzr|#?0) -+** ldr za\[\1, 1\], \[x1(?:, #1, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_0_1, -+ svldr_vnum_za (0, x1, 1), -+ svldr_vnum_za (0, x1, 1)) -+ -+/* -+** ldr_vnum_za_1_0: -+** mov (w1[2-5]), #?1 -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_1_0, -+ svldr_vnum_za (1, x1, 0), -+ svldr_vnum_za (1, x1, 0)) -+ -+/* -+** ldr_vnum_za_1_2: -+** mov (w1[2-5]), #?1 -+** ldr za\[\1, 2\], \[x1(?:, #2, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_1_2, -+ svldr_vnum_za (1, x1, 2), -+ svldr_vnum_za (1, x1, 2)) -+ -+/* -+** ldr_vnum_za_w0_0: -+** mov (w1[2-5]), w0 -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_w0_0, -+ svldr_vnum_za (w0, x1, 0), -+ svldr_vnum_za (w0, x1, 0)) -+ -+/* -+** ldr_vnum_za_w0_1: -+** mov (w1[2-5]), w0 -+** ldr za\[\1, 1\], \[x1, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_w0_1, -+ svldr_vnum_za (w0, x1, 1), -+ svldr_vnum_za (w0, x1, 1)) -+ -+/* -+** ldr_vnum_za_w0_13: -+** mov (w1[2-5]), w0 -+** ldr za\[\1, 13\], \[x1, #13, mul vl\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_w0_13, -+ svldr_vnum_za (w0, x1, 13), -+ svldr_vnum_za (w0, x1, 13)) -+ -+/* -+** ldr_vnum_za_w0_15: -+** mov (w1[2-5]), w0 -+** ldr za\[\1, 15\], \[x1, #15, mul vl\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_w0_15, -+ svldr_vnum_za (w0, x1, 15), -+ svldr_vnum_za (w0, x1, 15)) -+ -+/* -+** ldr_vnum_za_w0_16: -+** ( -+** add (w1[2-5]), w0, #?16 -+** addsvl (x[0-9]+), x1, #16 -+** ldr za\[\1, 0\], \[\2(?:, #0, mul vl)?\] -+** | -+** addsvl (x[0-9]+), x1, #16 -+** add (w1[2-5]), w0, #?16 -+** ldr za\[\4, 0\], \[\3(?:, #0, mul vl)?\] -+** ) -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_w0_16, -+ svldr_vnum_za (w0, x1, 16), -+ svldr_vnum_za (w0, x1, 16)) -+ -+/* -+** ldr_vnum_za_w0_m1: -+** ( -+** sub (w1[2-5]), w0, #?1 -+** addsvl (x[0-9]+), x1, #-1 -+** ldr za\[\1, 0\], \[\2(?:, #0, mul vl)?\] -+** | -+** addsvl (x[0-9]+), x1, #-1 -+** sub (w1[2-5]), w0, #?1 -+** ldr za\[\4, 0\], \[\3(?:, #0, mul vl)?\] -+** ) -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_w0_m1, -+ svldr_vnum_za (w0, x1, -1), -+ svldr_vnum_za (w0, x1, -1)) -+ -+/* -+** ldr_vnum_za_w0p1_0: -+** add (w1[2-5]), w0, #?1 -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_w0p1_0, -+ svldr_vnum_za (w0 + 1, x1, 0), -+ svldr_vnum_za (w0 + 1, x1, 0)) -+ -+/* -+** ldr_vnum_za_w0m1_1: -+** sub (w1[2-5]), w0, #?1 -+** ldr za\[\1, 1\], \[x1(?:, #1, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_w0m1_1, -+ svldr_vnum_za (w0 - 1, x1, 1), -+ svldr_vnum_za (w0 - 1, x1, 1)) -+ -+/* -+** ldr_vnum_za_w0p2_3: -+** add (w1[2-5]), w0, #?2 -+** ldr za\[\1, 3\], \[x1(?:, #3, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_vnum_za_w0p2_3, -+ svldr_vnum_za (w0 + 2, x1, 3), -+ svldr_vnum_za (w0 + 2, x1, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_s.c -new file mode 100644 -index 000000000..313b3239a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_s.c -@@ -0,0 +1,124 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** ldr_za_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_za_0, -+ svldr_za (0, x1), -+ svldr_za (0, x1)) -+ -+/* -+** ldr_za_1: -+** mov (w1[2-5]), #?1 -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_za_1, -+ svldr_za (1, x1), -+ svldr_za (1, x1)) -+ -+/* -+** ldr_za_w0: -+** mov (w1[2-5]), w0 -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_za_w0, -+ svldr_za (w0, x1), -+ svldr_za (w0, x1)) -+ -+/* -+** ldr_za_w0_1_vnum: -+** mov (w1[2-5]), w0 -+** ldr za\[\1, 1\], \[x1, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_za_w0_1_vnum, -+ svldr_za (w0 + 1, x1 + svcntsb ()), -+ svldr_za (w0 + 1, x1 + svcntsb ())) -+ -+/* -+** ldr_za_w0_13_vnum: -+** mov (w1[2-5]), w0 -+** ldr za\[\1, 13\], \[x1, #13, mul vl\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_za_w0_13_vnum, -+ svldr_za (w0 + 13, x1 + svcntsb () * 13), -+ svldr_za (w0 + 13, x1 + svcntsb () * 13)) -+ -+/* -+** ldr_za_w0_15_vnum: -+** mov (w1[2-5]), w0 -+** ldr za\[\1, 15\], \[x1, #15, mul vl\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_za_w0_15_vnum, -+ svldr_za (w0 + 15, x1 + svcntsb () * 15), -+ svldr_za (w0 + 15, x1 + svcntsb () * 15)) -+ -+/* -+** ldr_za_w0_16_vnum: -+** ( -+** add (w1[2-5]), w0, #?16 -+** incb x1, all, mul #16 -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** | -+** incb x1, all, mul #16 -+** add (w1[2-5]), w0, #?16 -+** ldr za\[\2, 0\], \[x1(?:, #0, mul vl)?\] -+** ) -+** ret -+*/ -+TEST_LOAD_ZA (ldr_za_w0_16_vnum, -+ svldr_za (w0 + 16, x1 + svcntsb () * 16), -+ svldr_za (w0 + 16, x1 + svcntsb () * 16)) -+ -+/* -+** ldr_za_w0_m1_vnum: -+** ( -+** sub (w1[2-5]), w0, #?1 -+** decb x1 -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** | -+** decb x1 -+** sub (w1[2-5]), w0, #?1 -+** ldr za\[\2, 0\], \[x1(?:, #0, mul vl)?\] -+** ) -+** ret -+*/ -+TEST_LOAD_ZA (ldr_za_w0_m1_vnum, -+ svldr_za (w0 - 1, x1 - svcntsb ()), -+ svldr_za (w0 - 1, x1 - svcntsb ())) -+ -+/* -+** ldr_za_w0p2: -+** add (w1[2-5]), w0, #?2 -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_za_w0p2, -+ svldr_za (w0 + 2, x1), -+ svldr_za (w0 + 2, x1)) -+ -+/* -+** ldr_za_offset: -+** ( -+** mov (w1[2-5]), w0 -+** add (x[0-9]+), x1, #?1 -+** ldr za\[\1, 0\], \[\2(?:, #0, mul vl)?\] -+** | -+** add (x[0-9]+), x1, #?1 -+** mov (w1[2-5]), w0 -+** ldr za\[\4, 0\], \[\3(?:, #0, mul vl)?\] -+** ) -+** ret -+*/ -+TEST_LOAD_ZA (ldr_za_offset, -+ svldr_za (w0, x1 + 1), -+ svldr_za (w0, x1 + 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_sc.c -new file mode 100644 -index 000000000..a27be7671 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/ldr_za_sc.c -@@ -0,0 +1,71 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#define STREAMING_COMPATIBLE -+#include "test_sme_acle.h" -+ -+/* -+** ldr_za_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_za_0, -+ svldr_za (0, x1), -+ svldr_za (0, x1)) -+ -+/* -+** ldr_za_1: -+** mov (w1[2-5]), #?1 -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_za_1, -+ svldr_za (1, x1), -+ svldr_za (1, x1)) -+ -+/* -+** ldr_za_w0: -+** mov (w1[2-5]), w0 -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_za_w0, -+ svldr_za (w0, x1), -+ svldr_za (w0, x1)) -+ -+/* -+** ldr_za_w0_1_vnum: -+** mov (w1[2-5]), w0 -+** ldr za\[\1, 1\], \[x1, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_za_w0_1_vnum, -+ svldr_za (w0 + 1, x1 + svcntsb ()), -+ svldr_za (w0 + 1, x1 + svcntsb ())) -+ -+/* -+** ldr_za_w0p2: -+** add (w1[2-5]), w0, #?2 -+** ldr za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_LOAD_ZA (ldr_za_w0p2, -+ svldr_za (w0 + 2, x1), -+ svldr_za (w0 + 2, x1)) -+ -+/* -+** ldr_za_offset: -+** ( -+** mov (w1[2-5]), w0 -+** add (x[0-9]+), x1, #?1 -+** ldr za\[\1, 0\], \[\2(?:, #0, mul vl)?\] -+** | -+** add (x[0-9]+), x1, #?1 -+** mov (w1[2-5]), w0 -+** ldr za\[\4, 0\], \[\3(?:, #0, mul vl)?\] -+** ) -+** ret -+*/ -+TEST_LOAD_ZA (ldr_za_offset, -+ svldr_za (w0, x1 + 1), -+ svldr_za (w0, x1 + 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za32.c -new file mode 100644 -index 000000000..480de2c7f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za32.c -@@ -0,0 +1,102 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** mopa_za32_s8_0_p0_p1_z0_z1: -+** smopa za0\.s, p0/m, p1/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZA (mopa_za32_s8_0_p0_p1_z0_z1, svint8_t, -+ svmopa_za32_s8_m (0, p0, p1, z0, z1), -+ svmopa_za32_m (0, p0, p1, z0, z1)) -+ -+/* -+** mopa_za32_s8_0_p1_p0_z1_z0: -+** smopa za0\.s, p1/m, p0/m, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_ZA (mopa_za32_s8_0_p1_p0_z1_z0, svint8_t, -+ svmopa_za32_s8_m (0, p1, p0, z1, z0), -+ svmopa_za32_m (0, p1, p0, z1, z0)) -+ -+/* -+** mopa_za32_s8_3_p0_p1_z0_z1: -+** smopa za3\.s, p0/m, p1/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZA (mopa_za32_s8_3_p0_p1_z0_z1, svint8_t, -+ svmopa_za32_s8_m (3, p0, p1, z0, z1), -+ svmopa_za32_m (3, p0, p1, z0, z1)) -+ -+/* -+** mopa_za32_u8_0_p0_p1_z0_z1: -+** umopa za0\.s, p0/m, p1/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZA (mopa_za32_u8_0_p0_p1_z0_z1, svuint8_t, -+ svmopa_za32_u8_m (0, p0, p1, z0, z1), -+ svmopa_za32_m (0, p0, p1, z0, z1)) -+ -+/* -+** mopa_za32_u8_3_p0_p1_z0_z1: -+** umopa za3\.s, p0/m, p1/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZA (mopa_za32_u8_3_p0_p1_z0_z1, svuint8_t, -+ svmopa_za32_u8_m (3, p0, p1, z0, z1), -+ svmopa_za32_m (3, p0, p1, z0, z1)) -+ -+/* -+** mopa_za32_bf16_0_p0_p1_z0_z1: -+** bfmopa za0\.s, p0/m, p1/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZA (mopa_za32_bf16_0_p0_p1_z0_z1, svbfloat16_t, -+ svmopa_za32_bf16_m (0, p0, p1, z0, z1), -+ svmopa_za32_m (0, p0, p1, z0, z1)) -+ -+/* -+** mopa_za32_bf16_3_p0_p1_z0_z1: -+** bfmopa za3\.s, p0/m, p1/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZA (mopa_za32_bf16_3_p0_p1_z0_z1, svbfloat16_t, -+ svmopa_za32_bf16_m (3, p0, p1, z0, z1), -+ svmopa_za32_m (3, p0, p1, z0, z1)) -+ -+/* -+** mopa_za32_f16_0_p0_p1_z0_z1: -+** fmopa za0\.s, p0/m, p1/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZA (mopa_za32_f16_0_p0_p1_z0_z1, svfloat16_t, -+ svmopa_za32_f16_m (0, p0, p1, z0, z1), -+ svmopa_za32_m (0, p0, p1, z0, z1)) -+ -+/* -+** mopa_za32_f16_3_p0_p1_z0_z1: -+** fmopa za3\.s, p0/m, p1/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZA (mopa_za32_f16_3_p0_p1_z0_z1, svfloat16_t, -+ svmopa_za32_f16_m (3, p0, p1, z0, z1), -+ svmopa_za32_m (3, p0, p1, z0, z1)) -+ -+/* -+** mopa_za32_f32_0_p0_p1_z0_z1: -+** fmopa za0\.s, p0/m, p1/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZA (mopa_za32_f32_0_p0_p1_z0_z1, svfloat32_t, -+ svmopa_za32_f32_m (0, p0, p1, z0, z1), -+ svmopa_za32_m (0, p0, p1, z0, z1)) -+ -+/* -+** mopa_za32_f32_3_p0_p1_z0_z1: -+** fmopa za3\.s, p0/m, p1/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZA (mopa_za32_f32_3_p0_p1_z0_z1, svfloat32_t, -+ svmopa_za32_f32_m (3, p0, p1, z0, z1), -+ svmopa_za32_m (3, p0, p1, z0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za64.c -new file mode 100644 -index 000000000..f523b9605 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mopa_za64.c -@@ -0,0 +1,70 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+#pragma GCC target "+sme-i16i64" -+ -+/* -+** mopa_za64_s16_0_p0_p1_z0_z1: -+** smopa za0\.d, p0/m, p1/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZA (mopa_za64_s16_0_p0_p1_z0_z1, svint16_t, -+ svmopa_za64_s16_m (0, p0, p1, z0, z1), -+ svmopa_za64_m (0, p0, p1, z0, z1)) -+ -+/* -+** mopa_za64_s16_0_p1_p0_z1_z0: -+** smopa za0\.d, p1/m, p0/m, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_ZA (mopa_za64_s16_0_p1_p0_z1_z0, svint16_t, -+ svmopa_za64_s16_m (0, p1, p0, z1, z0), -+ svmopa_za64_m (0, p1, p0, z1, z0)) -+ -+/* -+** mopa_za64_s16_7_p0_p1_z0_z1: -+** smopa za7\.d, p0/m, p1/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZA (mopa_za64_s16_7_p0_p1_z0_z1, svint16_t, -+ svmopa_za64_s16_m (7, p0, p1, z0, z1), -+ svmopa_za64_m (7, p0, p1, z0, z1)) -+ -+/* -+** mopa_za64_u16_0_p0_p1_z0_z1: -+** umopa za0\.d, p0/m, p1/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZA (mopa_za64_u16_0_p0_p1_z0_z1, svuint16_t, -+ svmopa_za64_u16_m (0, p0, p1, z0, z1), -+ svmopa_za64_m (0, p0, p1, z0, z1)) -+ -+/* -+** mopa_za64_u16_7_p0_p1_z0_z1: -+** umopa za7\.d, p0/m, p1/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZA (mopa_za64_u16_7_p0_p1_z0_z1, svuint16_t, -+ svmopa_za64_u16_m (7, p0, p1, z0, z1), -+ svmopa_za64_m (7, p0, p1, z0, z1)) -+ -+#pragma GCC target "+nosme-i16i64+sme-f64f64" -+ -+/* -+** mopa_za64_f64_0_p0_p1_z0_z1: -+** fmopa za0\.d, p0/m, p1/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZA (mopa_za64_f64_0_p0_p1_z0_z1, svfloat64_t, -+ svmopa_za64_f64_m (0, p0, p1, z0, z1), -+ svmopa_za64_m (0, p0, p1, z0, z1)) -+ -+/* -+** mopa_za64_f64_7_p0_p1_z0_z1: -+** fmopa za7\.d, p0/m, p1/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZA (mopa_za64_f64_7_p0_p1_z0_z1, svfloat64_t, -+ svmopa_za64_f64_m (7, p0, p1, z0, z1), -+ svmopa_za64_m (7, p0, p1, z0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za32.c -new file mode 100644 -index 000000000..63c2b80fd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za32.c -@@ -0,0 +1,102 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** mops_za32_s8_0_p0_p1_z0_z1: -+** smops za0\.s, p0/m, p1/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZA (mops_za32_s8_0_p0_p1_z0_z1, svint8_t, -+ svmops_za32_s8_m (0, p0, p1, z0, z1), -+ svmops_za32_m (0, p0, p1, z0, z1)) -+ -+/* -+** mops_za32_s8_0_p1_p0_z1_z0: -+** smops za0\.s, p1/m, p0/m, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_ZA (mops_za32_s8_0_p1_p0_z1_z0, svint8_t, -+ svmops_za32_s8_m (0, p1, p0, z1, z0), -+ svmops_za32_m (0, p1, p0, z1, z0)) -+ -+/* -+** mops_za32_s8_3_p0_p1_z0_z1: -+** smops za3\.s, p0/m, p1/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZA (mops_za32_s8_3_p0_p1_z0_z1, svint8_t, -+ svmops_za32_s8_m (3, p0, p1, z0, z1), -+ svmops_za32_m (3, p0, p1, z0, z1)) -+ -+/* -+** mops_za32_u8_0_p0_p1_z0_z1: -+** umops za0\.s, p0/m, p1/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZA (mops_za32_u8_0_p0_p1_z0_z1, svuint8_t, -+ svmops_za32_u8_m (0, p0, p1, z0, z1), -+ svmops_za32_m (0, p0, p1, z0, z1)) -+ -+/* -+** mops_za32_u8_3_p0_p1_z0_z1: -+** umops za3\.s, p0/m, p1/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZA (mops_za32_u8_3_p0_p1_z0_z1, svuint8_t, -+ svmops_za32_u8_m (3, p0, p1, z0, z1), -+ svmops_za32_m (3, p0, p1, z0, z1)) -+ -+/* -+** mops_za32_bf16_0_p0_p1_z0_z1: -+** bfmops za0\.s, p0/m, p1/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZA (mops_za32_bf16_0_p0_p1_z0_z1, svbfloat16_t, -+ svmops_za32_bf16_m (0, p0, p1, z0, z1), -+ svmops_za32_m (0, p0, p1, z0, z1)) -+ -+/* -+** mops_za32_bf16_3_p0_p1_z0_z1: -+** bfmops za3\.s, p0/m, p1/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZA (mops_za32_bf16_3_p0_p1_z0_z1, svbfloat16_t, -+ svmops_za32_bf16_m (3, p0, p1, z0, z1), -+ svmops_za32_m (3, p0, p1, z0, z1)) -+ -+/* -+** mops_za32_f16_0_p0_p1_z0_z1: -+** fmops za0\.s, p0/m, p1/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZA (mops_za32_f16_0_p0_p1_z0_z1, svfloat16_t, -+ svmops_za32_f16_m (0, p0, p1, z0, z1), -+ svmops_za32_m (0, p0, p1, z0, z1)) -+ -+/* -+** mops_za32_f16_3_p0_p1_z0_z1: -+** fmops za3\.s, p0/m, p1/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZA (mops_za32_f16_3_p0_p1_z0_z1, svfloat16_t, -+ svmops_za32_f16_m (3, p0, p1, z0, z1), -+ svmops_za32_m (3, p0, p1, z0, z1)) -+ -+/* -+** mops_za32_f32_0_p0_p1_z0_z1: -+** fmops za0\.s, p0/m, p1/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZA (mops_za32_f32_0_p0_p1_z0_z1, svfloat32_t, -+ svmops_za32_f32_m (0, p0, p1, z0, z1), -+ svmops_za32_m (0, p0, p1, z0, z1)) -+ -+/* -+** mops_za32_f32_3_p0_p1_z0_z1: -+** fmops za3\.s, p0/m, p1/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZA (mops_za32_f32_3_p0_p1_z0_z1, svfloat32_t, -+ svmops_za32_f32_m (3, p0, p1, z0, z1), -+ svmops_za32_m (3, p0, p1, z0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za64.c -new file mode 100644 -index 000000000..bc04c3cf7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/mops_za64.c -@@ -0,0 +1,70 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+#pragma GCC target "+sme-i16i64" -+ -+/* -+** mops_za64_s16_0_p0_p1_z0_z1: -+** smops za0\.d, p0/m, p1/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZA (mops_za64_s16_0_p0_p1_z0_z1, svint16_t, -+ svmops_za64_s16_m (0, p0, p1, z0, z1), -+ svmops_za64_m (0, p0, p1, z0, z1)) -+ -+/* -+** mops_za64_s16_0_p1_p0_z1_z0: -+** smops za0\.d, p1/m, p0/m, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_ZA (mops_za64_s16_0_p1_p0_z1_z0, svint16_t, -+ svmops_za64_s16_m (0, p1, p0, z1, z0), -+ svmops_za64_m (0, p1, p0, z1, z0)) -+ -+/* -+** mops_za64_s16_7_p0_p1_z0_z1: -+** smops za7\.d, p0/m, p1/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZA (mops_za64_s16_7_p0_p1_z0_z1, svint16_t, -+ svmops_za64_s16_m (7, p0, p1, z0, z1), -+ svmops_za64_m (7, p0, p1, z0, z1)) -+ -+/* -+** mops_za64_u16_0_p0_p1_z0_z1: -+** umops za0\.d, p0/m, p1/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZA (mops_za64_u16_0_p0_p1_z0_z1, svuint16_t, -+ svmops_za64_u16_m (0, p0, p1, z0, z1), -+ svmops_za64_m (0, p0, p1, z0, z1)) -+ -+/* -+** mops_za64_u16_7_p0_p1_z0_z1: -+** umops za7\.d, p0/m, p1/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZA (mops_za64_u16_7_p0_p1_z0_z1, svuint16_t, -+ svmops_za64_u16_m (7, p0, p1, z0, z1), -+ svmops_za64_m (7, p0, p1, z0, z1)) -+ -+#pragma GCC target "+nosme-i16i64+sme-f64f64" -+ -+/* -+** mops_za64_f64_0_p0_p1_z0_z1: -+** fmops za0\.d, p0/m, p1/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZA (mops_za64_f64_0_p0_p1_z0_z1, svfloat64_t, -+ svmops_za64_f64_m (0, p0, p1, z0, z1), -+ svmops_za64_m (0, p0, p1, z0, z1)) -+ -+/* -+** mops_za64_f64_7_p0_p1_z0_z1: -+** fmops za7\.d, p0/m, p1/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZA (mops_za64_f64_7_p0_p1_z0_z1, svfloat64_t, -+ svmops_za64_f64_m (7, p0, p1, z0, z1), -+ svmops_za64_m (7, p0, p1, z0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c -new file mode 100644 -index 000000000..c8eef3b16 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za128.c -@@ -0,0 +1,435 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** read_za128_s8_0_0_tied: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s8_0_0_tied, svint8_t, -+ z0 = svread_hor_za128_s8_m (z0, p0, 0, 0), -+ z0 = svread_hor_za128_m (z0, p0, 0, 0)) -+ -+/* -+** read_za128_s8_0_1_tied: -+** mov (w1[2-5]), #?1 -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s8_0_1_tied, svint8_t, -+ z0 = svread_hor_za128_s8_m (z0, p0, 0, 1), -+ z0 = svread_hor_za128_m (z0, p0, 0, 1)) -+ -+/* -+** read_za128_s8_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s8_0_w0_tied, svint8_t, -+ z0 = svread_hor_za128_s8_m (z0, p0, 0, w0), -+ z0 = svread_hor_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_s8_0_w0p1_tied: -+** add (w1[2-5]), w0, #?1 -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s8_0_w0p1_tied, svint8_t, -+ z0 = svread_hor_za128_s8_m (z0, p0, 0, w0 + 1), -+ z0 = svread_hor_za128_m (z0, p0, 0, w0 + 1)) -+ -+/* -+** read_za128_s8_0_w0m1_tied: -+** sub (w1[2-5]), w0, #?1 -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s8_0_w0m1_tied, svint8_t, -+ z0 = svread_hor_za128_s8_m (z0, p0, 0, w0 - 1), -+ z0 = svread_hor_za128_m (z0, p0, 0, w0 - 1)) -+ -+/* -+** read_za128_s8_1_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za1h\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s8_1_w0_tied, svint8_t, -+ z0 = svread_hor_za128_s8_m (z0, p0, 1, w0), -+ z0 = svread_hor_za128_m (z0, p0, 1, w0)) -+ -+/* -+** read_za128_s8_15_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za15h\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s8_15_w0_tied, svint8_t, -+ z0 = svread_hor_za128_s8_m (z0, p0, 15, w0), -+ z0 = svread_hor_za128_m (z0, p0, 15, w0)) -+ -+/* -+** read_za128_s8_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0h\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_s8_0_w0_untied, svint8_t, -+ z0 = svread_hor_za128_s8_m (z1, p0, 0, w0), -+ z0 = svread_hor_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_u8_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_u8_0_w0_tied, svuint8_t, -+ z0 = svread_hor_za128_u8_m (z0, p0, 0, w0), -+ z0 = svread_hor_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_u8_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0h\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_u8_0_w0_untied, svuint8_t, -+ z0 = svread_hor_za128_u8_m (z1, p0, 0, w0), -+ z0 = svread_hor_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_s16_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s16_0_w0_tied, svint16_t, -+ z0 = svread_hor_za128_s16_m (z0, p0, 0, w0), -+ z0 = svread_hor_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_s16_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0h\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_s16_0_w0_untied, svint16_t, -+ z0 = svread_hor_za128_s16_m (z1, p0, 0, w0), -+ z0 = svread_hor_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_u16_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_u16_0_w0_tied, svuint16_t, -+ z0 = svread_hor_za128_u16_m (z0, p0, 0, w0), -+ z0 = svread_hor_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_u16_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0h\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_u16_0_w0_untied, svuint16_t, -+ z0 = svread_hor_za128_u16_m (z1, p0, 0, w0), -+ z0 = svread_hor_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_f16_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_f16_0_w0_tied, svfloat16_t, -+ z0 = svread_hor_za128_f16_m (z0, p0, 0, w0), -+ z0 = svread_hor_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_f16_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0h\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_f16_0_w0_untied, svfloat16_t, -+ z0 = svread_hor_za128_f16_m (z1, p0, 0, w0), -+ z0 = svread_hor_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_bf16_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_bf16_0_w0_tied, svbfloat16_t, -+ z0 = svread_hor_za128_bf16_m (z0, p0, 0, w0), -+ z0 = svread_hor_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_bf16_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0h\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_bf16_0_w0_untied, svbfloat16_t, -+ z0 = svread_hor_za128_bf16_m (z1, p0, 0, w0), -+ z0 = svread_hor_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_s32_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s32_0_w0_tied, svint32_t, -+ z0 = svread_hor_za128_s32_m (z0, p0, 0, w0), -+ z0 = svread_hor_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_s32_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0h\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_s32_0_w0_untied, svint32_t, -+ z0 = svread_hor_za128_s32_m (z1, p0, 0, w0), -+ z0 = svread_hor_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_u32_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_u32_0_w0_tied, svuint32_t, -+ z0 = svread_hor_za128_u32_m (z0, p0, 0, w0), -+ z0 = svread_hor_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_u32_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0h\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_u32_0_w0_untied, svuint32_t, -+ z0 = svread_hor_za128_u32_m (z1, p0, 0, w0), -+ z0 = svread_hor_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_f32_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_f32_0_w0_tied, svfloat32_t, -+ z0 = svread_hor_za128_f32_m (z0, p0, 0, w0), -+ z0 = svread_hor_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_f32_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0h\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_f32_0_w0_untied, svfloat32_t, -+ z0 = svread_hor_za128_f32_m (z1, p0, 0, w0), -+ z0 = svread_hor_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_s64_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s64_0_w0_tied, svint64_t, -+ z0 = svread_hor_za128_s64_m (z0, p0, 0, w0), -+ z0 = svread_hor_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_s64_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0h\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_s64_0_w0_untied, svint64_t, -+ z0 = svread_hor_za128_s64_m (z1, p0, 0, w0), -+ z0 = svread_hor_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_u64_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_u64_0_w0_tied, svuint64_t, -+ z0 = svread_hor_za128_u64_m (z0, p0, 0, w0), -+ z0 = svread_hor_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_u64_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0h\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_u64_0_w0_untied, svuint64_t, -+ z0 = svread_hor_za128_u64_m (z1, p0, 0, w0), -+ z0 = svread_hor_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_f64_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_f64_0_w0_tied, svfloat64_t, -+ z0 = svread_hor_za128_f64_m (z0, p0, 0, w0), -+ z0 = svread_hor_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_f64_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0h\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0h\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0h\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_f64_0_w0_untied, svfloat64_t, -+ z0 = svread_hor_za128_f64_m (z1, p0, 0, w0), -+ z0 = svread_hor_za128_m (z1, p0, 0, w0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za16.c -new file mode 100644 -index 000000000..2e0a96591 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za16.c -@@ -0,0 +1,207 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** read_za16_s16_0_0_tied: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova z0\.h, p0/m, za0h\.h\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_0_0_tied, svint16_t, -+ z0 = svread_hor_za16_s16_m (z0, p0, 0, 0), -+ z0 = svread_hor_za16_m (z0, p0, 0, 0)) -+ -+/* -+** read_za16_s16_0_1_tied: -+** mov (w1[2-5]), #?1 -+** mova z0\.h, p0/m, za0h\.h\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_0_1_tied, svint16_t, -+ z0 = svread_hor_za16_s16_m (z0, p0, 0, 1), -+ z0 = svread_hor_za16_m (z0, p0, 0, 1)) -+ -+/* -+** read_za16_s16_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0h\.h\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_0_w0_tied, svint16_t, -+ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0), -+ z0 = svread_hor_za16_m (z0, p0, 0, w0)) -+ -+/* -+** read_za16_s16_0_w0p1_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0h\.h\[\1, 1\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_0_w0p1_tied, svint16_t, -+ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0 + 1), -+ z0 = svread_hor_za16_m (z0, p0, 0, w0 + 1)) -+ -+/* -+** read_za16_s16_0_w0p7_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0h\.h\[\1, 7\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_0_w0p7_tied, svint16_t, -+ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0 + 7), -+ z0 = svread_hor_za16_m (z0, p0, 0, w0 + 7)) -+ -+/* -+** read_za16_s16_0_w0p8_tied: -+** add (w1[2-5]), w0, #?8 -+** mova z0\.h, p0/m, za0h\.h\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_0_w0p8_tied, svint16_t, -+ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0 + 8), -+ z0 = svread_hor_za16_m (z0, p0, 0, w0 + 8)) -+ -+/* -+** read_za16_s16_0_w0m1_tied: -+** sub (w1[2-5]), w0, #?1 -+** mova z0\.h, p0/m, za0h\.h\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_0_w0m1_tied, svint16_t, -+ z0 = svread_hor_za16_s16_m (z0, p0, 0, w0 - 1), -+ z0 = svread_hor_za16_m (z0, p0, 0, w0 - 1)) -+ -+/* -+** read_za16_s16_1_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za1h\.h\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_1_w0_tied, svint16_t, -+ z0 = svread_hor_za16_s16_m (z0, p0, 1, w0), -+ z0 = svread_hor_za16_m (z0, p0, 1, w0)) -+ -+/* -+** read_za16_s16_1_w0p7_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za1h\.h\[\1, 7\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_1_w0p7_tied, svint16_t, -+ z0 = svread_hor_za16_s16_m (z0, p0, 1, w0 + 7), -+ z0 = svread_hor_za16_m (z0, p0, 1, w0 + 7)) -+ -+/* -+** read_za16_s16_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.h, p0/m, za0h\.h\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0h\.h\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.h, p0/m, za0h\.h\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_0_w0_untied, svint16_t, -+ z0 = svread_hor_za16_s16_m (z1, p0, 0, w0), -+ z0 = svread_hor_za16_m (z1, p0, 0, w0)) -+ -+/* -+** read_za16_u16_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0h\.h\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_u16_0_w0_tied, svuint16_t, -+ z0 = svread_hor_za16_u16_m (z0, p0, 0, w0), -+ z0 = svread_hor_za16_m (z0, p0, 0, w0)) -+ -+/* -+** read_za16_u16_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.h, p0/m, za0h\.h\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0h\.h\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.h, p0/m, za0h\.h\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za16_u16_0_w0_untied, svuint16_t, -+ z0 = svread_hor_za16_u16_m (z1, p0, 0, w0), -+ z0 = svread_hor_za16_m (z1, p0, 0, w0)) -+ -+/* -+** read_za16_f16_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0h\.h\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_f16_0_w0_tied, svfloat16_t, -+ z0 = svread_hor_za16_f16_m (z0, p0, 0, w0), -+ z0 = svread_hor_za16_m (z0, p0, 0, w0)) -+ -+/* -+** read_za16_f16_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.h, p0/m, za0h\.h\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0h\.h\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.h, p0/m, za0h\.h\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za16_f16_0_w0_untied, svfloat16_t, -+ z0 = svread_hor_za16_f16_m (z1, p0, 0, w0), -+ z0 = svread_hor_za16_m (z1, p0, 0, w0)) -+ -+/* -+** read_za16_bf16_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0h\.h\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_bf16_0_w0_tied, svbfloat16_t, -+ z0 = svread_hor_za16_bf16_m (z0, p0, 0, w0), -+ z0 = svread_hor_za16_m (z0, p0, 0, w0)) -+ -+/* -+** read_za16_bf16_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.h, p0/m, za0h\.h\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0h\.h\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.h, p0/m, za0h\.h\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za16_bf16_0_w0_untied, svbfloat16_t, -+ z0 = svread_hor_za16_bf16_m (z1, p0, 0, w0), -+ z0 = svread_hor_za16_m (z1, p0, 0, w0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za32.c -new file mode 100644 -index 000000000..d111b60a7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za32.c -@@ -0,0 +1,196 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** read_za32_s32_0_0_tied: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova z0\.s, p0/m, za0h\.s\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_0_0_tied, svint32_t, -+ z0 = svread_hor_za32_s32_m (z0, p0, 0, 0), -+ z0 = svread_hor_za32_m (z0, p0, 0, 0)) -+ -+/* -+** read_za32_s32_0_1_tied: -+** mov (w1[2-5]), #?1 -+** mova z0\.s, p0/m, za0h\.s\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_0_1_tied, svint32_t, -+ z0 = svread_hor_za32_s32_m (z0, p0, 0, 1), -+ z0 = svread_hor_za32_m (z0, p0, 0, 1)) -+ -+/* -+** read_za32_s32_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za0h\.s\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_0_w0_tied, svint32_t, -+ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0), -+ z0 = svread_hor_za32_m (z0, p0, 0, w0)) -+ -+/* -+** read_za32_s32_0_w0p1_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za0h\.s\[\1, 1\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_0_w0p1_tied, svint32_t, -+ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0 + 1), -+ z0 = svread_hor_za32_m (z0, p0, 0, w0 + 1)) -+ -+/* -+** read_za32_s32_0_w0p3_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za0h\.s\[\1, 3\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_0_w0p3_tied, svint32_t, -+ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0 + 3), -+ z0 = svread_hor_za32_m (z0, p0, 0, w0 + 3)) -+ -+/* -+** read_za32_s32_0_w0p4_tied: -+** add (w1[2-5]), w0, #?4 -+** mova z0\.s, p0/m, za0h\.s\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_0_w0p4_tied, svint32_t, -+ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0 + 4), -+ z0 = svread_hor_za32_m (z0, p0, 0, w0 + 4)) -+ -+/* -+** read_za32_s32_0_w0m1_tied: -+** sub (w1[2-5]), w0, #?1 -+** mova z0\.s, p0/m, za0h\.s\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_0_w0m1_tied, svint32_t, -+ z0 = svread_hor_za32_s32_m (z0, p0, 0, w0 - 1), -+ z0 = svread_hor_za32_m (z0, p0, 0, w0 - 1)) -+ -+/* -+** read_za32_s32_1_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za1h\.s\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_1_w0_tied, svint32_t, -+ z0 = svread_hor_za32_s32_m (z0, p0, 1, w0), -+ z0 = svread_hor_za32_m (z0, p0, 1, w0)) -+ -+/* -+** read_za32_s32_1_w0p3_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za1h\.s\[\1, 3\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_1_w0p3_tied, svint32_t, -+ z0 = svread_hor_za32_s32_m (z0, p0, 1, w0 + 3), -+ z0 = svread_hor_za32_m (z0, p0, 1, w0 + 3)) -+ -+/* -+** read_za32_s32_3_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za3h\.s\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_3_w0_tied, svint32_t, -+ z0 = svread_hor_za32_s32_m (z0, p0, 3, w0), -+ z0 = svread_hor_za32_m (z0, p0, 3, w0)) -+ -+/* -+** read_za32_s32_3_w0p3_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za3h\.s\[\1, 3\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_3_w0p3_tied, svint32_t, -+ z0 = svread_hor_za32_s32_m (z0, p0, 3, w0 + 3), -+ z0 = svread_hor_za32_m (z0, p0, 3, w0 + 3)) -+ -+/* -+** read_za32_s32_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.s, p0/m, za0h\.s\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za0h\.s\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.s, p0/m, za0h\.s\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_0_w0_untied, svint32_t, -+ z0 = svread_hor_za32_s32_m (z1, p0, 0, w0), -+ z0 = svread_hor_za32_m (z1, p0, 0, w0)) -+ -+/* -+** read_za32_u32_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za0h\.s\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_u32_0_w0_tied, svuint32_t, -+ z0 = svread_hor_za32_u32_m (z0, p0, 0, w0), -+ z0 = svread_hor_za32_m (z0, p0, 0, w0)) -+ -+/* -+** read_za32_u32_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.s, p0/m, za0h\.s\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za0h\.s\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.s, p0/m, za0h\.s\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za32_u32_0_w0_untied, svuint32_t, -+ z0 = svread_hor_za32_u32_m (z1, p0, 0, w0), -+ z0 = svread_hor_za32_m (z1, p0, 0, w0)) -+ -+/* -+** read_za32_f32_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za0h\.s\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_f32_0_w0_tied, svfloat32_t, -+ z0 = svread_hor_za32_f32_m (z0, p0, 0, w0), -+ z0 = svread_hor_za32_m (z0, p0, 0, w0)) -+ -+/* -+** read_za32_f32_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.s, p0/m, za0h\.s\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za0h\.s\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.s, p0/m, za0h\.s\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za32_f32_0_w0_untied, svfloat32_t, -+ z0 = svread_hor_za32_f32_m (z1, p0, 0, w0), -+ z0 = svread_hor_za32_m (z1, p0, 0, w0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za64.c -new file mode 100644 -index 000000000..b75c531a5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za64.c -@@ -0,0 +1,186 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** read_za64_s64_0_0_tied: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova z0\.d, p0/m, za0h\.d\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_0_0_tied, svint64_t, -+ z0 = svread_hor_za64_s64_m (z0, p0, 0, 0), -+ z0 = svread_hor_za64_m (z0, p0, 0, 0)) -+ -+/* -+** read_za64_s64_0_1_tied: -+** mov (w1[2-5]), #?1 -+** mova z0\.d, p0/m, za0h\.d\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_0_1_tied, svint64_t, -+ z0 = svread_hor_za64_s64_m (z0, p0, 0, 1), -+ z0 = svread_hor_za64_m (z0, p0, 0, 1)) -+ -+/* -+** read_za64_s64_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za0h\.d\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_0_w0_tied, svint64_t, -+ z0 = svread_hor_za64_s64_m (z0, p0, 0, w0), -+ z0 = svread_hor_za64_m (z0, p0, 0, w0)) -+ -+/* -+** read_za64_s64_0_w0p1_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za0h\.d\[\1, 1\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_0_w0p1_tied, svint64_t, -+ z0 = svread_hor_za64_s64_m (z0, p0, 0, w0 + 1), -+ z0 = svread_hor_za64_m (z0, p0, 0, w0 + 1)) -+ -+/* -+** read_za64_s64_0_w0p2_tied: -+** add (w1[2-5]), w0, #?2 -+** mova z0\.d, p0/m, za0h\.d\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_0_w0p2_tied, svint64_t, -+ z0 = svread_hor_za64_s64_m (z0, p0, 0, w0 + 2), -+ z0 = svread_hor_za64_m (z0, p0, 0, w0 + 2)) -+ -+/* -+** read_za64_s64_0_w0m1_tied: -+** sub (w1[2-5]), w0, #?1 -+** mova z0\.d, p0/m, za0h\.d\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_0_w0m1_tied, svint64_t, -+ z0 = svread_hor_za64_s64_m (z0, p0, 0, w0 - 1), -+ z0 = svread_hor_za64_m (z0, p0, 0, w0 - 1)) -+ -+/* -+** read_za64_s64_1_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za1h\.d\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_1_w0_tied, svint64_t, -+ z0 = svread_hor_za64_s64_m (z0, p0, 1, w0), -+ z0 = svread_hor_za64_m (z0, p0, 1, w0)) -+ -+/* -+** read_za64_s64_1_w0p1_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za1h\.d\[\1, 1\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_1_w0p1_tied, svint64_t, -+ z0 = svread_hor_za64_s64_m (z0, p0, 1, w0 + 1), -+ z0 = svread_hor_za64_m (z0, p0, 1, w0 + 1)) -+ -+/* -+** read_za64_s64_7_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za7h\.d\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_7_w0_tied, svint64_t, -+ z0 = svread_hor_za64_s64_m (z0, p0, 7, w0), -+ z0 = svread_hor_za64_m (z0, p0, 7, w0)) -+ -+/* -+** read_za64_s64_7_w0p1_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za7h\.d\[\1, 1\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_7_w0p1_tied, svint64_t, -+ z0 = svread_hor_za64_s64_m (z0, p0, 7, w0 + 1), -+ z0 = svread_hor_za64_m (z0, p0, 7, w0 + 1)) -+ -+/* -+** read_za64_s64_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.d, p0/m, za0h\.d\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za0h\.d\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.d, p0/m, za0h\.d\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_0_w0_untied, svint64_t, -+ z0 = svread_hor_za64_s64_m (z1, p0, 0, w0), -+ z0 = svread_hor_za64_m (z1, p0, 0, w0)) -+ -+/* -+** read_za64_u64_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za0h\.d\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_u64_0_w0_tied, svuint64_t, -+ z0 = svread_hor_za64_u64_m (z0, p0, 0, w0), -+ z0 = svread_hor_za64_m (z0, p0, 0, w0)) -+ -+/* -+** read_za64_u64_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.d, p0/m, za0h\.d\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za0h\.d\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.d, p0/m, za0h\.d\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za64_u64_0_w0_untied, svuint64_t, -+ z0 = svread_hor_za64_u64_m (z1, p0, 0, w0), -+ z0 = svread_hor_za64_m (z1, p0, 0, w0)) -+ -+/* -+** read_za64_f64_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za0h\.d\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_f64_0_w0_tied, svfloat64_t, -+ z0 = svread_hor_za64_f64_m (z0, p0, 0, w0), -+ z0 = svread_hor_za64_m (z0, p0, 0, w0)) -+ -+/* -+** read_za64_f64_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.d, p0/m, za0h\.d\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za0h\.d\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.d, p0/m, za0h\.d\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za64_f64_0_w0_untied, svfloat64_t, -+ z0 = svread_hor_za64_f64_m (z1, p0, 0, w0), -+ z0 = svread_hor_za64_m (z1, p0, 0, w0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c -new file mode 100644 -index 000000000..0ad5a953f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_hor_za8.c -@@ -0,0 +1,125 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** read_za8_s8_0_0_tied: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova z0\.b, p0/m, za0h\.b\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za8_s8_0_0_tied, svint8_t, -+ z0 = svread_hor_za8_s8_m (z0, p0, 0, 0), -+ z0 = svread_hor_za8_m (z0, p0, 0, 0)) -+ -+/* -+** read_za8_s8_0_1_tied: -+** mov (w1[2-5]), #?1 -+** mova z0\.b, p0/m, za0h\.b\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za8_s8_0_1_tied, svint8_t, -+ z0 = svread_hor_za8_s8_m (z0, p0, 0, 1), -+ z0 = svread_hor_za8_m (z0, p0, 0, 1)) -+ -+/* -+** read_za8_s8_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.b, p0/m, za0h\.b\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za8_s8_0_w0_tied, svint8_t, -+ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0), -+ z0 = svread_hor_za8_m (z0, p0, 0, w0)) -+ -+/* -+** read_za8_s8_0_w0p1_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.b, p0/m, za0h\.b\[\1, 1\] -+** ret -+*/ -+TEST_READ_ZA (read_za8_s8_0_w0p1_tied, svint8_t, -+ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0 + 1), -+ z0 = svread_hor_za8_m (z0, p0, 0, w0 + 1)) -+ -+/* -+** read_za8_s8_0_w0p15_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.b, p0/m, za0h\.b\[\1, 15\] -+** ret -+*/ -+TEST_READ_ZA (read_za8_s8_0_w0p15_tied, svint8_t, -+ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0 + 15), -+ z0 = svread_hor_za8_m (z0, p0, 0, w0 + 15)) -+ -+/* -+** read_za8_s8_0_w0p16_tied: -+** add (w1[2-5]), w0, #?16 -+** mova z0\.b, p0/m, za0h\.b\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za8_s8_0_w0p16_tied, svint8_t, -+ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0 + 16), -+ z0 = svread_hor_za8_m (z0, p0, 0, w0 + 16)) -+ -+/* -+** read_za8_s8_0_w0m1_tied: -+** sub (w1[2-5]), w0, #?1 -+** mova z0\.b, p0/m, za0h\.b\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za8_s8_0_w0m1_tied, svint8_t, -+ z0 = svread_hor_za8_s8_m (z0, p0, 0, w0 - 1), -+ z0 = svread_hor_za8_m (z0, p0, 0, w0 - 1)) -+ -+/* -+** read_za8_s8_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.b, p0/m, za0h\.b\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.b, p0/m, za0h\.b\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.b, p0/m, za0h\.b\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za8_s8_0_w0_untied, svint8_t, -+ z0 = svread_hor_za8_s8_m (z1, p0, 0, w0), -+ z0 = svread_hor_za8_m (z1, p0, 0, w0)) -+ -+/* -+** read_za8_u8_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.b, p0/m, za0h\.b\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za8_u8_0_w0_tied, svuint8_t, -+ z0 = svread_hor_za8_u8_m (z0, p0, 0, w0), -+ z0 = svread_hor_za8_m (z0, p0, 0, w0)) -+ -+/* -+** read_za8_u8_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.b, p0/m, za0h\.b\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.b, p0/m, za0h\.b\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.b, p0/m, za0h\.b\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za8_u8_0_w0_untied, svuint8_t, -+ z0 = svread_hor_za8_u8_m (z1, p0, 0, w0), -+ z0 = svread_hor_za8_m (z1, p0, 0, w0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c -new file mode 100644 -index 000000000..93d5d60ea ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za128.c -@@ -0,0 +1,435 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** read_za128_s8_0_0_tied: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s8_0_0_tied, svint8_t, -+ z0 = svread_ver_za128_s8_m (z0, p0, 0, 0), -+ z0 = svread_ver_za128_m (z0, p0, 0, 0)) -+ -+/* -+** read_za128_s8_0_1_tied: -+** mov (w1[2-5]), #?1 -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s8_0_1_tied, svint8_t, -+ z0 = svread_ver_za128_s8_m (z0, p0, 0, 1), -+ z0 = svread_ver_za128_m (z0, p0, 0, 1)) -+ -+/* -+** read_za128_s8_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s8_0_w0_tied, svint8_t, -+ z0 = svread_ver_za128_s8_m (z0, p0, 0, w0), -+ z0 = svread_ver_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_s8_0_w0p1_tied: -+** add (w1[2-5]), w0, #?1 -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s8_0_w0p1_tied, svint8_t, -+ z0 = svread_ver_za128_s8_m (z0, p0, 0, w0 + 1), -+ z0 = svread_ver_za128_m (z0, p0, 0, w0 + 1)) -+ -+/* -+** read_za128_s8_0_w0m1_tied: -+** sub (w1[2-5]), w0, #?1 -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s8_0_w0m1_tied, svint8_t, -+ z0 = svread_ver_za128_s8_m (z0, p0, 0, w0 - 1), -+ z0 = svread_ver_za128_m (z0, p0, 0, w0 - 1)) -+ -+/* -+** read_za128_s8_1_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za1v\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s8_1_w0_tied, svint8_t, -+ z0 = svread_ver_za128_s8_m (z0, p0, 1, w0), -+ z0 = svread_ver_za128_m (z0, p0, 1, w0)) -+ -+/* -+** read_za128_s8_15_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za15v\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s8_15_w0_tied, svint8_t, -+ z0 = svread_ver_za128_s8_m (z0, p0, 15, w0), -+ z0 = svread_ver_za128_m (z0, p0, 15, w0)) -+ -+/* -+** read_za128_s8_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0v\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_s8_0_w0_untied, svint8_t, -+ z0 = svread_ver_za128_s8_m (z1, p0, 0, w0), -+ z0 = svread_ver_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_u8_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_u8_0_w0_tied, svuint8_t, -+ z0 = svread_ver_za128_u8_m (z0, p0, 0, w0), -+ z0 = svread_ver_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_u8_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0v\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_u8_0_w0_untied, svuint8_t, -+ z0 = svread_ver_za128_u8_m (z1, p0, 0, w0), -+ z0 = svread_ver_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_s16_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s16_0_w0_tied, svint16_t, -+ z0 = svread_ver_za128_s16_m (z0, p0, 0, w0), -+ z0 = svread_ver_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_s16_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0v\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_s16_0_w0_untied, svint16_t, -+ z0 = svread_ver_za128_s16_m (z1, p0, 0, w0), -+ z0 = svread_ver_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_u16_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_u16_0_w0_tied, svuint16_t, -+ z0 = svread_ver_za128_u16_m (z0, p0, 0, w0), -+ z0 = svread_ver_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_u16_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0v\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_u16_0_w0_untied, svuint16_t, -+ z0 = svread_ver_za128_u16_m (z1, p0, 0, w0), -+ z0 = svread_ver_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_f16_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_f16_0_w0_tied, svfloat16_t, -+ z0 = svread_ver_za128_f16_m (z0, p0, 0, w0), -+ z0 = svread_ver_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_f16_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0v\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_f16_0_w0_untied, svfloat16_t, -+ z0 = svread_ver_za128_f16_m (z1, p0, 0, w0), -+ z0 = svread_ver_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_bf16_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_bf16_0_w0_tied, svbfloat16_t, -+ z0 = svread_ver_za128_bf16_m (z0, p0, 0, w0), -+ z0 = svread_ver_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_bf16_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0v\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_bf16_0_w0_untied, svbfloat16_t, -+ z0 = svread_ver_za128_bf16_m (z1, p0, 0, w0), -+ z0 = svread_ver_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_s32_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s32_0_w0_tied, svint32_t, -+ z0 = svread_ver_za128_s32_m (z0, p0, 0, w0), -+ z0 = svread_ver_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_s32_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0v\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_s32_0_w0_untied, svint32_t, -+ z0 = svread_ver_za128_s32_m (z1, p0, 0, w0), -+ z0 = svread_ver_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_u32_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_u32_0_w0_tied, svuint32_t, -+ z0 = svread_ver_za128_u32_m (z0, p0, 0, w0), -+ z0 = svread_ver_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_u32_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0v\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_u32_0_w0_untied, svuint32_t, -+ z0 = svread_ver_za128_u32_m (z1, p0, 0, w0), -+ z0 = svread_ver_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_f32_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_f32_0_w0_tied, svfloat32_t, -+ z0 = svread_ver_za128_f32_m (z0, p0, 0, w0), -+ z0 = svread_ver_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_f32_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0v\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_f32_0_w0_untied, svfloat32_t, -+ z0 = svread_ver_za128_f32_m (z1, p0, 0, w0), -+ z0 = svread_ver_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_s64_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_s64_0_w0_tied, svint64_t, -+ z0 = svread_ver_za128_s64_m (z0, p0, 0, w0), -+ z0 = svread_ver_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_s64_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0v\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_s64_0_w0_untied, svint64_t, -+ z0 = svread_ver_za128_s64_m (z1, p0, 0, w0), -+ z0 = svread_ver_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_u64_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_u64_0_w0_tied, svuint64_t, -+ z0 = svread_ver_za128_u64_m (z0, p0, 0, w0), -+ z0 = svread_ver_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_u64_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0v\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_u64_0_w0_untied, svuint64_t, -+ z0 = svread_ver_za128_u64_m (z1, p0, 0, w0), -+ z0 = svread_ver_za128_m (z1, p0, 0, w0)) -+ -+/* -+** read_za128_f64_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za128_f64_0_w0_tied, svfloat64_t, -+ z0 = svread_ver_za128_f64_m (z0, p0, 0, w0), -+ z0 = svread_ver_za128_m (z0, p0, 0, w0)) -+ -+/* -+** read_za128_f64_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.q, p0/m, za0v\.q\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.q, p0/m, za0v\.q\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.q, p0/m, za0v\.q\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za128_f64_0_w0_untied, svfloat64_t, -+ z0 = svread_ver_za128_f64_m (z1, p0, 0, w0), -+ z0 = svread_ver_za128_m (z1, p0, 0, w0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za16.c -new file mode 100644 -index 000000000..d0353dce6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za16.c -@@ -0,0 +1,207 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** read_za16_s16_0_0_tied: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova z0\.h, p0/m, za0v\.h\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_0_0_tied, svint16_t, -+ z0 = svread_ver_za16_s16_m (z0, p0, 0, 0), -+ z0 = svread_ver_za16_m (z0, p0, 0, 0)) -+ -+/* -+** read_za16_s16_0_1_tied: -+** mov (w1[2-5]), #?1 -+** mova z0\.h, p0/m, za0v\.h\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_0_1_tied, svint16_t, -+ z0 = svread_ver_za16_s16_m (z0, p0, 0, 1), -+ z0 = svread_ver_za16_m (z0, p0, 0, 1)) -+ -+/* -+** read_za16_s16_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0v\.h\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_0_w0_tied, svint16_t, -+ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0), -+ z0 = svread_ver_za16_m (z0, p0, 0, w0)) -+ -+/* -+** read_za16_s16_0_w0p1_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0v\.h\[\1, 1\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_0_w0p1_tied, svint16_t, -+ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0 + 1), -+ z0 = svread_ver_za16_m (z0, p0, 0, w0 + 1)) -+ -+/* -+** read_za16_s16_0_w0p7_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0v\.h\[\1, 7\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_0_w0p7_tied, svint16_t, -+ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0 + 7), -+ z0 = svread_ver_za16_m (z0, p0, 0, w0 + 7)) -+ -+/* -+** read_za16_s16_0_w0p8_tied: -+** add (w1[2-5]), w0, #?8 -+** mova z0\.h, p0/m, za0v\.h\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_0_w0p8_tied, svint16_t, -+ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0 + 8), -+ z0 = svread_ver_za16_m (z0, p0, 0, w0 + 8)) -+ -+/* -+** read_za16_s16_0_w0m1_tied: -+** sub (w1[2-5]), w0, #?1 -+** mova z0\.h, p0/m, za0v\.h\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_0_w0m1_tied, svint16_t, -+ z0 = svread_ver_za16_s16_m (z0, p0, 0, w0 - 1), -+ z0 = svread_ver_za16_m (z0, p0, 0, w0 - 1)) -+ -+/* -+** read_za16_s16_1_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za1v\.h\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_1_w0_tied, svint16_t, -+ z0 = svread_ver_za16_s16_m (z0, p0, 1, w0), -+ z0 = svread_ver_za16_m (z0, p0, 1, w0)) -+ -+/* -+** read_za16_s16_1_w0p7_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za1v\.h\[\1, 7\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_1_w0p7_tied, svint16_t, -+ z0 = svread_ver_za16_s16_m (z0, p0, 1, w0 + 7), -+ z0 = svread_ver_za16_m (z0, p0, 1, w0 + 7)) -+ -+/* -+** read_za16_s16_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.h, p0/m, za0v\.h\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0v\.h\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.h, p0/m, za0v\.h\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za16_s16_0_w0_untied, svint16_t, -+ z0 = svread_ver_za16_s16_m (z1, p0, 0, w0), -+ z0 = svread_ver_za16_m (z1, p0, 0, w0)) -+ -+/* -+** read_za16_u16_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0v\.h\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_u16_0_w0_tied, svuint16_t, -+ z0 = svread_ver_za16_u16_m (z0, p0, 0, w0), -+ z0 = svread_ver_za16_m (z0, p0, 0, w0)) -+ -+/* -+** read_za16_u16_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.h, p0/m, za0v\.h\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0v\.h\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.h, p0/m, za0v\.h\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za16_u16_0_w0_untied, svuint16_t, -+ z0 = svread_ver_za16_u16_m (z1, p0, 0, w0), -+ z0 = svread_ver_za16_m (z1, p0, 0, w0)) -+ -+/* -+** read_za16_f16_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0v\.h\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_f16_0_w0_tied, svfloat16_t, -+ z0 = svread_ver_za16_f16_m (z0, p0, 0, w0), -+ z0 = svread_ver_za16_m (z0, p0, 0, w0)) -+ -+/* -+** read_za16_f16_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.h, p0/m, za0v\.h\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0v\.h\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.h, p0/m, za0v\.h\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za16_f16_0_w0_untied, svfloat16_t, -+ z0 = svread_ver_za16_f16_m (z1, p0, 0, w0), -+ z0 = svread_ver_za16_m (z1, p0, 0, w0)) -+ -+/* -+** read_za16_bf16_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0v\.h\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za16_bf16_0_w0_tied, svbfloat16_t, -+ z0 = svread_ver_za16_bf16_m (z0, p0, 0, w0), -+ z0 = svread_ver_za16_m (z0, p0, 0, w0)) -+ -+/* -+** read_za16_bf16_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.h, p0/m, za0v\.h\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.h, p0/m, za0v\.h\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.h, p0/m, za0v\.h\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za16_bf16_0_w0_untied, svbfloat16_t, -+ z0 = svread_ver_za16_bf16_m (z1, p0, 0, w0), -+ z0 = svread_ver_za16_m (z1, p0, 0, w0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za32.c -new file mode 100644 -index 000000000..362e818ee ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za32.c -@@ -0,0 +1,196 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** read_za32_s32_0_0_tied: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova z0\.s, p0/m, za0v\.s\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_0_0_tied, svint32_t, -+ z0 = svread_ver_za32_s32_m (z0, p0, 0, 0), -+ z0 = svread_ver_za32_m (z0, p0, 0, 0)) -+ -+/* -+** read_za32_s32_0_1_tied: -+** mov (w1[2-5]), #?1 -+** mova z0\.s, p0/m, za0v\.s\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_0_1_tied, svint32_t, -+ z0 = svread_ver_za32_s32_m (z0, p0, 0, 1), -+ z0 = svread_ver_za32_m (z0, p0, 0, 1)) -+ -+/* -+** read_za32_s32_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za0v\.s\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_0_w0_tied, svint32_t, -+ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0), -+ z0 = svread_ver_za32_m (z0, p0, 0, w0)) -+ -+/* -+** read_za32_s32_0_w0p1_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za0v\.s\[\1, 1\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_0_w0p1_tied, svint32_t, -+ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0 + 1), -+ z0 = svread_ver_za32_m (z0, p0, 0, w0 + 1)) -+ -+/* -+** read_za32_s32_0_w0p3_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za0v\.s\[\1, 3\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_0_w0p3_tied, svint32_t, -+ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0 + 3), -+ z0 = svread_ver_za32_m (z0, p0, 0, w0 + 3)) -+ -+/* -+** read_za32_s32_0_w0p4_tied: -+** add (w1[2-5]), w0, #?4 -+** mova z0\.s, p0/m, za0v\.s\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_0_w0p4_tied, svint32_t, -+ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0 + 4), -+ z0 = svread_ver_za32_m (z0, p0, 0, w0 + 4)) -+ -+/* -+** read_za32_s32_0_w0m1_tied: -+** sub (w1[2-5]), w0, #?1 -+** mova z0\.s, p0/m, za0v\.s\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_0_w0m1_tied, svint32_t, -+ z0 = svread_ver_za32_s32_m (z0, p0, 0, w0 - 1), -+ z0 = svread_ver_za32_m (z0, p0, 0, w0 - 1)) -+ -+/* -+** read_za32_s32_1_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za1v\.s\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_1_w0_tied, svint32_t, -+ z0 = svread_ver_za32_s32_m (z0, p0, 1, w0), -+ z0 = svread_ver_za32_m (z0, p0, 1, w0)) -+ -+/* -+** read_za32_s32_1_w0p3_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za1v\.s\[\1, 3\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_1_w0p3_tied, svint32_t, -+ z0 = svread_ver_za32_s32_m (z0, p0, 1, w0 + 3), -+ z0 = svread_ver_za32_m (z0, p0, 1, w0 + 3)) -+ -+/* -+** read_za32_s32_3_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za3v\.s\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_3_w0_tied, svint32_t, -+ z0 = svread_ver_za32_s32_m (z0, p0, 3, w0), -+ z0 = svread_ver_za32_m (z0, p0, 3, w0)) -+ -+/* -+** read_za32_s32_3_w0p3_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za3v\.s\[\1, 3\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_3_w0p3_tied, svint32_t, -+ z0 = svread_ver_za32_s32_m (z0, p0, 3, w0 + 3), -+ z0 = svread_ver_za32_m (z0, p0, 3, w0 + 3)) -+ -+/* -+** read_za32_s32_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.s, p0/m, za0v\.s\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za0v\.s\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.s, p0/m, za0v\.s\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za32_s32_0_w0_untied, svint32_t, -+ z0 = svread_ver_za32_s32_m (z1, p0, 0, w0), -+ z0 = svread_ver_za32_m (z1, p0, 0, w0)) -+ -+/* -+** read_za32_u32_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za0v\.s\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_u32_0_w0_tied, svuint32_t, -+ z0 = svread_ver_za32_u32_m (z0, p0, 0, w0), -+ z0 = svread_ver_za32_m (z0, p0, 0, w0)) -+ -+/* -+** read_za32_u32_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.s, p0/m, za0v\.s\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za0v\.s\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.s, p0/m, za0v\.s\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za32_u32_0_w0_untied, svuint32_t, -+ z0 = svread_ver_za32_u32_m (z1, p0, 0, w0), -+ z0 = svread_ver_za32_m (z1, p0, 0, w0)) -+ -+/* -+** read_za32_f32_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za0v\.s\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za32_f32_0_w0_tied, svfloat32_t, -+ z0 = svread_ver_za32_f32_m (z0, p0, 0, w0), -+ z0 = svread_ver_za32_m (z0, p0, 0, w0)) -+ -+/* -+** read_za32_f32_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.s, p0/m, za0v\.s\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.s, p0/m, za0v\.s\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.s, p0/m, za0v\.s\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za32_f32_0_w0_untied, svfloat32_t, -+ z0 = svread_ver_za32_f32_m (z1, p0, 0, w0), -+ z0 = svread_ver_za32_m (z1, p0, 0, w0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za64.c -new file mode 100644 -index 000000000..dba3c6ffa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za64.c -@@ -0,0 +1,186 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** read_za64_s64_0_0_tied: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova z0\.d, p0/m, za0v\.d\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_0_0_tied, svint64_t, -+ z0 = svread_ver_za64_s64_m (z0, p0, 0, 0), -+ z0 = svread_ver_za64_m (z0, p0, 0, 0)) -+ -+/* -+** read_za64_s64_0_1_tied: -+** mov (w1[2-5]), #?1 -+** mova z0\.d, p0/m, za0v\.d\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_0_1_tied, svint64_t, -+ z0 = svread_ver_za64_s64_m (z0, p0, 0, 1), -+ z0 = svread_ver_za64_m (z0, p0, 0, 1)) -+ -+/* -+** read_za64_s64_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za0v\.d\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_0_w0_tied, svint64_t, -+ z0 = svread_ver_za64_s64_m (z0, p0, 0, w0), -+ z0 = svread_ver_za64_m (z0, p0, 0, w0)) -+ -+/* -+** read_za64_s64_0_w0p1_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za0v\.d\[\1, 1\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_0_w0p1_tied, svint64_t, -+ z0 = svread_ver_za64_s64_m (z0, p0, 0, w0 + 1), -+ z0 = svread_ver_za64_m (z0, p0, 0, w0 + 1)) -+ -+/* -+** read_za64_s64_0_w0p2_tied: -+** add (w1[2-5]), w0, #?2 -+** mova z0\.d, p0/m, za0v\.d\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_0_w0p2_tied, svint64_t, -+ z0 = svread_ver_za64_s64_m (z0, p0, 0, w0 + 2), -+ z0 = svread_ver_za64_m (z0, p0, 0, w0 + 2)) -+ -+/* -+** read_za64_s64_0_w0m1_tied: -+** sub (w1[2-5]), w0, #?1 -+** mova z0\.d, p0/m, za0v\.d\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_0_w0m1_tied, svint64_t, -+ z0 = svread_ver_za64_s64_m (z0, p0, 0, w0 - 1), -+ z0 = svread_ver_za64_m (z0, p0, 0, w0 - 1)) -+ -+/* -+** read_za64_s64_1_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za1v\.d\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_1_w0_tied, svint64_t, -+ z0 = svread_ver_za64_s64_m (z0, p0, 1, w0), -+ z0 = svread_ver_za64_m (z0, p0, 1, w0)) -+ -+/* -+** read_za64_s64_1_w0p1_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za1v\.d\[\1, 1\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_1_w0p1_tied, svint64_t, -+ z0 = svread_ver_za64_s64_m (z0, p0, 1, w0 + 1), -+ z0 = svread_ver_za64_m (z0, p0, 1, w0 + 1)) -+ -+/* -+** read_za64_s64_7_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za7v\.d\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_7_w0_tied, svint64_t, -+ z0 = svread_ver_za64_s64_m (z0, p0, 7, w0), -+ z0 = svread_ver_za64_m (z0, p0, 7, w0)) -+ -+/* -+** read_za64_s64_7_w0p1_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za7v\.d\[\1, 1\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_7_w0p1_tied, svint64_t, -+ z0 = svread_ver_za64_s64_m (z0, p0, 7, w0 + 1), -+ z0 = svread_ver_za64_m (z0, p0, 7, w0 + 1)) -+ -+/* -+** read_za64_s64_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.d, p0/m, za0v\.d\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za0v\.d\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.d, p0/m, za0v\.d\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za64_s64_0_w0_untied, svint64_t, -+ z0 = svread_ver_za64_s64_m (z1, p0, 0, w0), -+ z0 = svread_ver_za64_m (z1, p0, 0, w0)) -+ -+/* -+** read_za64_u64_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za0v\.d\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_u64_0_w0_tied, svuint64_t, -+ z0 = svread_ver_za64_u64_m (z0, p0, 0, w0), -+ z0 = svread_ver_za64_m (z0, p0, 0, w0)) -+ -+/* -+** read_za64_u64_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.d, p0/m, za0v\.d\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za0v\.d\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.d, p0/m, za0v\.d\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za64_u64_0_w0_untied, svuint64_t, -+ z0 = svread_ver_za64_u64_m (z1, p0, 0, w0), -+ z0 = svread_ver_za64_m (z1, p0, 0, w0)) -+ -+/* -+** read_za64_f64_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za0v\.d\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za64_f64_0_w0_tied, svfloat64_t, -+ z0 = svread_ver_za64_f64_m (z0, p0, 0, w0), -+ z0 = svread_ver_za64_m (z0, p0, 0, w0)) -+ -+/* -+** read_za64_f64_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.d, p0/m, za0v\.d\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.d, p0/m, za0v\.d\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.d, p0/m, za0v\.d\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za64_f64_0_w0_untied, svfloat64_t, -+ z0 = svread_ver_za64_f64_m (z1, p0, 0, w0), -+ z0 = svread_ver_za64_m (z1, p0, 0, w0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c -new file mode 100644 -index 000000000..87564d1fa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/read_ver_za8.c -@@ -0,0 +1,125 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** read_za8_s8_0_0_tied: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova z0\.b, p0/m, za0v\.b\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za8_s8_0_0_tied, svint8_t, -+ z0 = svread_ver_za8_s8_m (z0, p0, 0, 0), -+ z0 = svread_ver_za8_m (z0, p0, 0, 0)) -+ -+/* -+** read_za8_s8_0_1_tied: -+** mov (w1[2-5]), #?1 -+** mova z0\.b, p0/m, za0v\.b\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za8_s8_0_1_tied, svint8_t, -+ z0 = svread_ver_za8_s8_m (z0, p0, 0, 1), -+ z0 = svread_ver_za8_m (z0, p0, 0, 1)) -+ -+/* -+** read_za8_s8_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.b, p0/m, za0v\.b\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za8_s8_0_w0_tied, svint8_t, -+ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0), -+ z0 = svread_ver_za8_m (z0, p0, 0, w0)) -+ -+/* -+** read_za8_s8_0_w0p1_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.b, p0/m, za0v\.b\[\1, 1\] -+** ret -+*/ -+TEST_READ_ZA (read_za8_s8_0_w0p1_tied, svint8_t, -+ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0 + 1), -+ z0 = svread_ver_za8_m (z0, p0, 0, w0 + 1)) -+ -+/* -+** read_za8_s8_0_w0p15_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.b, p0/m, za0v\.b\[\1, 15\] -+** ret -+*/ -+TEST_READ_ZA (read_za8_s8_0_w0p15_tied, svint8_t, -+ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0 + 15), -+ z0 = svread_ver_za8_m (z0, p0, 0, w0 + 15)) -+ -+/* -+** read_za8_s8_0_w0p16_tied: -+** add (w1[2-5]), w0, #?16 -+** mova z0\.b, p0/m, za0v\.b\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za8_s8_0_w0p16_tied, svint8_t, -+ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0 + 16), -+ z0 = svread_ver_za8_m (z0, p0, 0, w0 + 16)) -+ -+/* -+** read_za8_s8_0_w0m1_tied: -+** sub (w1[2-5]), w0, #?1 -+** mova z0\.b, p0/m, za0v\.b\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za8_s8_0_w0m1_tied, svint8_t, -+ z0 = svread_ver_za8_s8_m (z0, p0, 0, w0 - 1), -+ z0 = svread_ver_za8_m (z0, p0, 0, w0 - 1)) -+ -+/* -+** read_za8_s8_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.b, p0/m, za0v\.b\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.b, p0/m, za0v\.b\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.b, p0/m, za0v\.b\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za8_s8_0_w0_untied, svint8_t, -+ z0 = svread_ver_za8_s8_m (z1, p0, 0, w0), -+ z0 = svread_ver_za8_m (z1, p0, 0, w0)) -+ -+/* -+** read_za8_u8_0_w0_tied: -+** mov (w1[2-5]), w0 -+** mova z0\.b, p0/m, za0v\.b\[\1, 0\] -+** ret -+*/ -+TEST_READ_ZA (read_za8_u8_0_w0_tied, svuint8_t, -+ z0 = svread_ver_za8_u8_m (z0, p0, 0, w0), -+ z0 = svread_ver_za8_m (z0, p0, 0, w0)) -+ -+/* -+** read_za8_u8_0_w0_untied: -+** ( -+** mov (w1[2-5]), w0 -+** mov z0\.d, z1\.d -+** mova z0\.b, p0/m, za0v\.b\[\1, 0\] -+** | -+** mov z0\.d, z1\.d -+** mov (w1[2-5]), w0 -+** mova z0\.b, p0/m, za0v\.b\[\2, 0\] -+** | -+** mov (w1[2-5]), w0 -+** mova z1\.b, p0/m, za0v\.b\[\3, 0\] -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_READ_ZA (read_za8_u8_0_w0_untied, svuint8_t, -+ z0 = svread_ver_za8_u8_m (z1, p0, 0, w0), -+ z0 = svread_ver_za8_m (z1, p0, 0, w0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za128.c -new file mode 100644 -index 000000000..057b6f21e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za128.c -@@ -0,0 +1,77 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_vnum_za128_0_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** st1q { za0h\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za128_0_0_0, -+ svst1_hor_vnum_za128 (0, 0, p0, x1, 0), -+ svst1_hor_vnum_za128 (0, 0, p0, x1, 0)) -+ -+/* -+** st1_vnum_za128_7_1_0: -+** mov (w1[2-5]), #?1 -+** st1q { za7h\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za128_7_1_0, -+ svst1_hor_vnum_za128 (7, 1, p0, x1, 0), -+ svst1_hor_vnum_za128 (7, 1, p0, x1, 0)) -+ -+/* -+** st1_vnum_za128_11_1_5: -+** incb x1, all, mul #5 -+** mov (w1[2-5]), #?6 -+** st1q { za11h\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za128_11_1_5, -+ svst1_hor_vnum_za128 (11, 1, p0, x1, 5), -+ svst1_hor_vnum_za128 (11, 1, p0, x1, 5)) -+ -+/* -+** st1_vnum_za128_3_w0_0: -+** mov (w1[2-5]), w0 -+** st1q { za3h\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za128_3_w0_0, -+ svst1_hor_vnum_za128 (3, w0, p0, x1, 0), -+ svst1_hor_vnum_za128 (3, w0, p0, x1, 0)) -+ -+/* -+** st1_vnum_za128_5_w0_0: -+** incb x1, all, mul #13 -+** add (w1[2-5]), w0, #?13 -+** st1q { za5h\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za128_5_w0_0, -+ svst1_hor_vnum_za128 (5, w0, p0, x1, 13), -+ svst1_hor_vnum_za128 (5, w0, p0, x1, 13)) -+ -+/* -+** st1_vnum_za128_11_w0_0: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** st1q { za11h\.q\[\3, 0\] }, p0, \[\2\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za128_11_w0_0, -+ svst1_hor_vnum_za128 (11, w0, p0, x1, x2), -+ svst1_hor_vnum_za128 (11, w0, p0, x1, x2)) -+ -+/* -+** st1_vnum_za128_15_w0p1_0: -+** add (w1[2-5]), w0, #?1 -+** st1q { za15h\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za128_15_w0p1_0, -+ svst1_hor_vnum_za128 (15, w0 + 1, p0, x1, 0), -+ svst1_hor_vnum_za128 (15, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za16.c -new file mode 100644 -index 000000000..0b57dda0a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za16.c -@@ -0,0 +1,123 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_vnum_za16_1_0_1: -+** incb x1 -+** mov (w1[2-5]), (?:wzr|#?0) -+** st1h { za1h\.h\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_1_0_1, -+ svst1_hor_vnum_za16 (1, 0, p0, x1, 1), -+ svst1_hor_vnum_za16 (1, 0, p0, x1, 1)) -+ -+/* -+** st1_vnum_za16_1_1_1: -+** incb x1 -+** mov (w1[2-5]), #?1 -+** st1h { za1h\.h\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_1_1_1, -+ svst1_hor_vnum_za16 (1, 1, p0, x1, 1), -+ svst1_hor_vnum_za16 (1, 1, p0, x1, 1)) -+ -+/* -+** st1_vnum_za16_0_0_8: -+** incb x1, all, mul #8 -+** mov (w1[2-5]), #?8 -+** st1h { za0h\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_0_0_8, -+ svst1_hor_vnum_za16 (0, 0, p0, x1, 8), -+ svst1_hor_vnum_za16 (0, 0, p0, x1, 8)) -+ -+/* -+** st1_vnum_za16_0_1_8: -+** incb x1, all, mul #8 -+** mov (w1[2-5]), #?9 -+** st1h { za0h\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_0_1_8, -+ svst1_hor_vnum_za16 (0, 1, p0, x1, 8), -+ svst1_hor_vnum_za16 (0, 1, p0, x1, 8)) -+ -+/* -+** st1_vnum_za16_0_w0_0: -+** mov (w1[2-5]), w0 -+** st1h { za0h\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_0_w0_0, -+ svst1_hor_vnum_za16 (0, w0, p0, x1, 0), -+ svst1_hor_vnum_za16 (0, w0, p0, x1, 0)) -+ -+/* -+** st1_vnum_za16_0_w0_1: -+** incb x1 -+** mov (w1[2-5]), w0 -+** st1h { za0h\.h\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_0_w0_1, -+ svst1_hor_vnum_za16 (0, w0, p0, x1, 1), -+ svst1_hor_vnum_za16 (0, w0, p0, x1, 1)) -+ -+/* -+** st1_vnum_za16_0_w0_7: -+** incb x1, all, mul #7 -+** mov (w1[2-5]), w0 -+** st1h { za0h\.h\[\1, 7\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_0_w0_7, -+ svst1_hor_vnum_za16 (0, w0, p0, x1, 7), -+ svst1_hor_vnum_za16 (0, w0, p0, x1, 7)) -+ -+/* -+** st1_vnum_za16_1_w0_8: -+** incb x1, all, mul #8 -+** add (w1[2-5]), w0, #?8 -+** st1h { za1h\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_1_w0_8, -+ svst1_hor_vnum_za16 (1, w0, p0, x1, 8), -+ svst1_hor_vnum_za16 (1, w0, p0, x1, 8)) -+ -+/* -+** st1_vnum_za16_1_w0_13: -+** incb x1, all, mul #13 -+** add (w1[2-5]), w0, #?13 -+** st1h { za1h\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_1_w0_13, -+ svst1_hor_vnum_za16 (1, w0, p0, x1, 13), -+ svst1_hor_vnum_za16 (1, w0, p0, x1, 13)) -+ -+/* -+** st1_vnum_za16_0_w0_x2: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** st1h { za0h\.h\[\3, 0\] }, p0, \[\2\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_0_w0_x2, -+ svst1_hor_vnum_za16 (0, w0, p0, x1, x2), -+ svst1_hor_vnum_za16 (0, w0, p0, x1, x2)) -+ -+/* -+** st1_vnum_za16_1_w0p1_0: -+** mov (w1[2-5]), w0 -+** st1h { za1h\.h\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_1_w0p1_0, -+ svst1_hor_vnum_za16 (1, w0 + 1, p0, x1, 0), -+ svst1_hor_vnum_za16 (1, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za32.c -new file mode 100644 -index 000000000..d4381182f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za32.c -@@ -0,0 +1,123 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_vnum_za32_3_0_1: -+** incb x1 -+** mov (w1[2-5]), (?:wzr|#?0) -+** st1w { za3h\.s\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_3_0_1, -+ svst1_hor_vnum_za32 (3, 0, p0, x1, 1), -+ svst1_hor_vnum_za32 (3, 0, p0, x1, 1)) -+ -+/* -+** st1_vnum_za32_2_1_1: -+** incb x1 -+** mov (w1[2-5]), #?1 -+** st1w { za2h\.s\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_2_1_1, -+ svst1_hor_vnum_za32 (2, 1, p0, x1, 1), -+ svst1_hor_vnum_za32 (2, 1, p0, x1, 1)) -+ -+/* -+** st1_vnum_za32_0_0_4: -+** incb x1, all, mul #4 -+** mov (w1[2-5]), #?4 -+** st1w { za0h\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_0_0_4, -+ svst1_hor_vnum_za32 (0, 0, p0, x1, 4), -+ svst1_hor_vnum_za32 (0, 0, p0, x1, 4)) -+ -+/* -+** st1_vnum_za32_2_1_4: -+** incb x1, all, mul #4 -+** mov (w1[2-5]), #?5 -+** st1w { za2h\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_2_1_4, -+ svst1_hor_vnum_za32 (2, 1, p0, x1, 4), -+ svst1_hor_vnum_za32 (2, 1, p0, x1, 4)) -+ -+/* -+** st1_vnum_za32_0_w0_0: -+** mov (w1[2-5]), w0 -+** st1w { za0h\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_0_w0_0, -+ svst1_hor_vnum_za32 (0, w0, p0, x1, 0), -+ svst1_hor_vnum_za32 (0, w0, p0, x1, 0)) -+ -+/* -+** st1_vnum_za32_0_w0_1: -+** incb x1 -+** mov (w1[2-5]), w0 -+** st1w { za0h\.s\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_0_w0_1, -+ svst1_hor_vnum_za32 (0, w0, p0, x1, 1), -+ svst1_hor_vnum_za32 (0, w0, p0, x1, 1)) -+ -+/* -+** st1_vnum_za32_0_w0_3: -+** incb x1, all, mul #3 -+** mov (w1[2-5]), w0 -+** st1w { za0h\.s\[\1, 3\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_0_w0_3, -+ svst1_hor_vnum_za32 (0, w0, p0, x1, 3), -+ svst1_hor_vnum_za32 (0, w0, p0, x1, 3)) -+ -+/* -+** st1_vnum_za32_1_w0_4: -+** incb x1, all, mul #4 -+** add (w1[2-5]), w0, #?4 -+** st1w { za1h\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_1_w0_4, -+ svst1_hor_vnum_za32 (1, w0, p0, x1, 4), -+ svst1_hor_vnum_za32 (1, w0, p0, x1, 4)) -+ -+/* -+** st1_vnum_za32_3_w0_13: -+** incb x1, all, mul #13 -+** add (w1[2-5]), w0, #?13 -+** st1w { za3h\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_3_w0_13, -+ svst1_hor_vnum_za32 (3, w0, p0, x1, 13), -+ svst1_hor_vnum_za32 (3, w0, p0, x1, 13)) -+ -+/* -+** st1_vnum_za32_0_w0_x2: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** st1w { za0h\.s\[\3, 0\] }, p0, \[\2\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_0_w0_x2, -+ svst1_hor_vnum_za32 (0, w0, p0, x1, x2), -+ svst1_hor_vnum_za32 (0, w0, p0, x1, x2)) -+ -+/* -+** st1_vnum_za32_1_w0p1_0: -+** mov (w1[2-5]), w0 -+** st1w { za1h\.s\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_1_w0p1_0, -+ svst1_hor_vnum_za32 (1, w0 + 1, p0, x1, 0), -+ svst1_hor_vnum_za32 (1, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za64.c -new file mode 100644 -index 000000000..be6063712 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za64.c -@@ -0,0 +1,112 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_vnum_za64_3_0_1: -+** incb x1 -+** mov (w1[2-5]), (?:wzr|#?0) -+** st1d { za3h\.d\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_3_0_1, -+ svst1_hor_vnum_za64 (3, 0, p0, x1, 1), -+ svst1_hor_vnum_za64 (3, 0, p0, x1, 1)) -+ -+/* -+** st1_vnum_za64_7_1_1: -+** incb x1 -+** mov (w1[2-5]), #?1 -+** st1d { za7h\.d\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_7_1_1, -+ svst1_hor_vnum_za64 (7, 1, p0, x1, 1), -+ svst1_hor_vnum_za64 (7, 1, p0, x1, 1)) -+ -+/* -+** st1_vnum_za64_0_0_2: -+** incb x1, all, mul #2 -+** mov (w1[2-5]), #?2 -+** st1d { za0h\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_0_0_2, -+ svst1_hor_vnum_za64 (0, 0, p0, x1, 2), -+ svst1_hor_vnum_za64 (0, 0, p0, x1, 2)) -+ -+/* -+** st1_vnum_za64_5_1_2: -+** incb x1, all, mul #2 -+** mov (w1[2-5]), #?3 -+** st1d { za5h\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_5_1_2, -+ svst1_hor_vnum_za64 (5, 1, p0, x1, 2), -+ svst1_hor_vnum_za64 (5, 1, p0, x1, 2)) -+ -+/* -+** st1_vnum_za64_0_w0_0: -+** mov (w1[2-5]), w0 -+** st1d { za0h\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_0_w0_0, -+ svst1_hor_vnum_za64 (0, w0, p0, x1, 0), -+ svst1_hor_vnum_za64 (0, w0, p0, x1, 0)) -+ -+/* -+** st1_vnum_za64_0_w0_1: -+** incb x1 -+** mov (w1[2-5]), w0 -+** st1d { za0h\.d\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_0_w0_1, -+ svst1_hor_vnum_za64 (0, w0, p0, x1, 1), -+ svst1_hor_vnum_za64 (0, w0, p0, x1, 1)) -+ -+/* -+** st1_vnum_za64_6_w0_2: -+** incb x1, all, mul #2 -+** add (w1[2-5]), w0, #?2 -+** st1d { za6h\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_6_w0_2, -+ svst1_hor_vnum_za64 (6, w0, p0, x1, 2), -+ svst1_hor_vnum_za64 (6, w0, p0, x1, 2)) -+ -+/* -+** st1_vnum_za64_2_w0_13: -+** incb x1, all, mul #13 -+** add (w1[2-5]), w0, #?13 -+** st1d { za2h\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_2_w0_13, -+ svst1_hor_vnum_za64 (2, w0, p0, x1, 13), -+ svst1_hor_vnum_za64 (2, w0, p0, x1, 13)) -+ -+/* -+** st1_vnum_za64_4_w0_x2: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** st1d { za4h\.d\[\3, 0\] }, p0, \[\2\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_4_w0_x2, -+ svst1_hor_vnum_za64 (4, w0, p0, x1, x2), -+ svst1_hor_vnum_za64 (4, w0, p0, x1, x2)) -+ -+/* -+** st1_vnum_za64_1_w0p1_0: -+** mov (w1[2-5]), w0 -+** st1d { za1h\.d\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_1_w0p1_0, -+ svst1_hor_vnum_za64 (1, w0 + 1, p0, x1, 0), -+ svst1_hor_vnum_za64 (1, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za8.c -new file mode 100644 -index 000000000..eed41d25e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_vnum_za8.c -@@ -0,0 +1,112 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_vnum_za8_0_0_1: -+** incb x1 -+** mov (w1[2-5]), (?:wzr|#?0) -+** st1b { za0h\.b\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_0_1, -+ svst1_hor_vnum_za8 (0, 0, p0, x1, 1), -+ svst1_hor_vnum_za8 (0, 0, p0, x1, 1)) -+ -+/* -+** st1_vnum_za8_0_1_1: -+** incb x1 -+** mov (w1[2-5]), #?1 -+** st1b { za0h\.b\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_1_1, -+ svst1_hor_vnum_za8 (0, 1, p0, x1, 1), -+ svst1_hor_vnum_za8 (0, 1, p0, x1, 1)) -+ -+/* -+** st1_vnum_za8_0_0_16: -+** incb x1, all, mul #16 -+** mov (w1[2-5]), #?16 -+** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_0_16, -+ svst1_hor_vnum_za8 (0, 0, p0, x1, 16), -+ svst1_hor_vnum_za8 (0, 0, p0, x1, 16)) -+ -+/* -+** st1_vnum_za8_0_1_16: -+** incb x1, all, mul #16 -+** mov (w1[2-5]), #?17 -+** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_1_16, -+ svst1_hor_vnum_za8 (0, 1, p0, x1, 16), -+ svst1_hor_vnum_za8 (0, 1, p0, x1, 16)) -+ -+/* -+** st1_vnum_za8_0_w0_0: -+** mov (w1[2-5]), w0 -+** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_w0_0, -+ svst1_hor_vnum_za8 (0, w0, p0, x1, 0), -+ svst1_hor_vnum_za8 (0, w0, p0, x1, 0)) -+ -+/* -+** st1_vnum_za8_0_w0_1: -+** incb x1 -+** mov (w1[2-5]), w0 -+** st1b { za0h\.b\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_w0_1, -+ svst1_hor_vnum_za8 (0, w0, p0, x1, 1), -+ svst1_hor_vnum_za8 (0, w0, p0, x1, 1)) -+ -+/* -+** st1_vnum_za8_0_w0_15: -+** incb x1, all, mul #15 -+** mov (w1[2-5]), w0 -+** st1b { za0h\.b\[\1, 15\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_w0_15, -+ svst1_hor_vnum_za8 (0, w0, p0, x1, 15), -+ svst1_hor_vnum_za8 (0, w0, p0, x1, 15)) -+ -+/* -+** st1_vnum_za8_0_w0_16: -+** incb x1, all, mul #16 -+** add (w1[2-5]), w0, #?16 -+** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_w0_16, -+ svst1_hor_vnum_za8 (0, w0, p0, x1, 16), -+ svst1_hor_vnum_za8 (0, w0, p0, x1, 16)) -+ -+/* -+** st1_vnum_za8_0_w0_x2: -+** cntb (x[0-9]+) -+** mul (x[0-9]+), (?:\1, x2|x2, \1) -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** st1b { za0h\.b\[\3, 0\] }, p0, \[x1, \2\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_w0_x2, -+ svst1_hor_vnum_za8 (0, w0, p0, x1, x2), -+ svst1_hor_vnum_za8 (0, w0, p0, x1, x2)) -+ -+/* -+** st1_vnum_za8_0_w0p1_0: -+** mov (w1[2-5]), w0 -+** st1b { za0h\.b\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_w0p1_0, -+ svst1_hor_vnum_za8 (0, w0 + 1, p0, x1, 0), -+ svst1_hor_vnum_za8 (0, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za128.c -new file mode 100644 -index 000000000..5f3d613d5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za128.c -@@ -0,0 +1,83 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_za128_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** st1q { za0h\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za128_0_0, -+ svst1_hor_za128 (0, 0, p0, x1), -+ svst1_hor_za128 (0, 0, p0, x1)) -+ -+/* -+** st1_za128_0_1: -+** mov (w1[2-5]), #?1 -+** st1q { za0h\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za128_0_1, -+ svst1_hor_za128 (0, 1, p0, x1), -+ svst1_hor_za128 (0, 1, p0, x1)) -+ -+/* -+** st1_za128_0_w0: -+** mov (w1[2-5]), w0 -+** st1q { za0h\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za128_0_w0, -+ svst1_hor_za128 (0, w0, p0, x1), -+ svst1_hor_za128 (0, w0, p0, x1)) -+ -+/* -+** st1_za128_0_w0_p1: -+** add (w1[2-5]), w0, #?1 -+** st1q { za0h\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za128_0_w0_p1, -+ svst1_hor_za128 (0, w0 + 1, p0, x1), -+ svst1_hor_za128 (0, w0 + 1, p0, x1)) -+ -+/* -+** st1_za128_7_w0: -+** mov (w1[2-5]), w0 -+** st1q { za7h\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za128_7_w0, -+ svst1_hor_za128 (7, w0, p0, x1), -+ svst1_hor_za128 (7, w0, p0, x1)) -+ -+/* -+** st1_za128_13_w0: -+** mov (w1[2-5]), w0 -+** st1q { za13h\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za128_13_w0, -+ svst1_hor_za128 (13, w0, p0, x1), -+ svst1_hor_za128 (13, w0, p0, x1)) -+ -+/* -+** st1_za128_15_w0: -+** mov (w1[2-5]), w0 -+** st1q { za15h\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za128_15_w0, -+ svst1_hor_za128 (15, w0, p0, x1), -+ svst1_hor_za128 (15, w0, p0, x1)) -+ -+/* -+** st1_za128_9_w0_index: -+** mov (w1[2-5]), w0 -+** st1q { za9h\.q\[\1, 0\] }, p0, \[x1, x2, lsl #?4\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za128_9_w0_index, -+ svst1_hor_za128 (9, w0, p0, x1 + x2 * 16), -+ svst1_hor_za128 (9, w0, p0, x1 + x2 * 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za16.c -new file mode 100644 -index 000000000..206306b23 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za16.c -@@ -0,0 +1,126 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_za16_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** st1h { za0h\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_0_0, -+ svst1_hor_za16 (0, 0, p0, x1), -+ svst1_hor_za16 (0, 0, p0, x1)) -+ -+/* It would also be OK (and perhaps better) to move 0 into a register -+ and use an offset of 7. */ -+/* -+** st1_za16_0_7: -+** mov (w1[2-5]), #?7 -+** st1h { za0h\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_0_7, -+ svst1_hor_za16 (0, 7, p0, x1), -+ svst1_hor_za16 (0, 7, p0, x1)) -+ -+/* -+** st1_za16_0_8: -+** mov (w1[2-5]), #?8 -+** st1h { za0h\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_0_8, -+ svst1_hor_za16 (0, 8, p0, x1), -+ svst1_hor_za16 (0, 8, p0, x1)) -+ -+/* -+** st1_za16_0_w0: -+** mov (w1[2-5]), w0 -+** st1h { za0h\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_0_w0, -+ svst1_hor_za16 (0, w0, p0, x1), -+ svst1_hor_za16 (0, w0, p0, x1)) -+ -+/* -+** st1_za16_0_w0_p1: -+** mov (w1[2-5]), w0 -+** st1h { za0h\.h\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_0_w0_p1, -+ svst1_hor_za16 (0, w0 + 1, p0, x1), -+ svst1_hor_za16 (0, w0 + 1, p0, x1)) -+ -+/* -+** st1_za16_0_w0_p7: -+** mov (w1[2-5]), w0 -+** st1h { za0h\.h\[\1, 7\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_0_w0_p7, -+ svst1_hor_za16 (0, w0 + 7, p0, x1), -+ svst1_hor_za16 (0, w0 + 7, p0, x1)) -+ -+/* -+** st1_za16_1_w0: -+** mov (w1[2-5]), w0 -+** st1h { za1h\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_1_w0, -+ svst1_hor_za16 (1, w0, p0, x1), -+ svst1_hor_za16 (1, w0, p0, x1)) -+ -+ -+/* -+** st1_za16_1_w0_p1: -+** mov (w1[2-5]), w0 -+** st1h { za1h\.h\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_1_w0_p1, -+ svst1_hor_za16 (1, w0 + 1, p0, x1), -+ svst1_hor_za16 (1, w0 + 1, p0, x1)) -+ -+/* -+** st1_za16_1_w0_p7: -+** mov (w1[2-5]), w0 -+** st1h { za1h\.h\[\1, 7\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_1_w0_p7, -+ svst1_hor_za16 (1, w0 + 7, p0, x1), -+ svst1_hor_za16 (1, w0 + 7, p0, x1)) -+ -+/* -+** st1_za16_1_w0_p5_index: -+** mov (w1[2-5]), w0 -+** st1h { za1h\.h\[\1, 5\] }, p0, \[x1, x2, lsl #?1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_1_w0_p5_index, -+ svst1_hor_za16 (1, w0 + 5, p0, x1 + x2 * 2), -+ svst1_hor_za16 (1, w0 + 5, p0, x1 + x2 * 2)) -+ -+/* -+** st1_za16_0_w0_p8: -+** add (w1[2-5]), w0, #?8 -+** st1h { za0h\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_0_w0_p8, -+ svst1_hor_za16 (0, w0 + 8, p0, x1), -+ svst1_hor_za16 (0, w0 + 8, p0, x1)) -+ -+/* -+** st1_za16_0_w0_m1: -+** sub (w1[2-5]), w0, #?1 -+** st1h { za0h\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_0_w0_m1, -+ svst1_hor_za16 (0, w0 - 1, p0, x1), -+ svst1_hor_za16 (0, w0 - 1, p0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za32.c -new file mode 100644 -index 000000000..ed9b2b2e9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za32.c -@@ -0,0 +1,125 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_za32_0_0: -+** mov (w1[2-5]), (?:w0|#?0) -+** st1w { za0h\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_0_0, -+ svst1_hor_za32 (0, 0, p0, x1), -+ svst1_hor_za32 (0, 0, p0, x1)) -+ -+/* It would also be OK (and perhaps better) to move 0 into a register -+ and use an offset of 3. */ -+/* -+** st1_za32_0_3: -+** mov (w1[2-5]), #?3 -+** st1w { za0h\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_0_3, -+ svst1_hor_za32 (0, 3, p0, x1), -+ svst1_hor_za32 (0, 3, p0, x1)) -+ -+/* -+** st1_za32_0_4: -+** mov (w1[2-5]), #?4 -+** st1w { za0h\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_0_4, -+ svst1_hor_za32 (0, 4, p0, x1), -+ svst1_hor_za32 (0, 4, p0, x1)) -+ -+/* -+** st1_za32_0_w0: -+** mov (w1[2-5]), w0 -+** st1w { za0h\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_0_w0, -+ svst1_hor_za32 (0, w0, p0, x1), -+ svst1_hor_za32 (0, w0, p0, x1)) -+ -+/* -+** st1_za32_0_w0_p1: -+** mov (w1[2-5]), w0 -+** st1w { za0h\.s\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_0_w0_p1, -+ svst1_hor_za32 (0, w0 + 1, p0, x1), -+ svst1_hor_za32 (0, w0 + 1, p0, x1)) -+ -+/* -+** st1_za32_0_w0_p3: -+** mov (w1[2-5]), w0 -+** st1w { za0h\.s\[\1, 3\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_0_w0_p3, -+ svst1_hor_za32 (0, w0 + 3, p0, x1), -+ svst1_hor_za32 (0, w0 + 3, p0, x1)) -+ -+/* -+** st1_za32_3_w0: -+** mov (w1[2-5]), w0 -+** st1w { za3h\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_3_w0, -+ svst1_hor_za32 (3, w0, p0, x1), -+ svst1_hor_za32 (3, w0, p0, x1)) -+ -+/* -+** st1_za32_3_w0_p1: -+** mov (w1[2-5]), w0 -+** st1w { za3h\.s\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_3_w0_p1, -+ svst1_hor_za32 (3, w0 + 1, p0, x1), -+ svst1_hor_za32 (3, w0 + 1, p0, x1)) -+ -+/* -+** st1_za32_3_w0_p3: -+** mov (w1[2-5]), w0 -+** st1w { za3h\.s\[\1, 3\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_3_w0_p3, -+ svst1_hor_za32 (3, w0 + 3, p0, x1), -+ svst1_hor_za32 (3, w0 + 3, p0, x1)) -+ -+/* -+** st1_za32_1_w0_p2_index: -+** mov (w1[2-5]), w0 -+** st1w { za1h\.s\[\1, 2\] }, p0, \[x1, x2, lsl #?2\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_1_w0_p2_index, -+ svst1_hor_za32 (1, w0 + 2, p0, x1 + x2 * 4), -+ svst1_hor_za32 (1, w0 + 2, p0, x1 + x2 * 4)) -+ -+/* -+** st1_za32_0_w0_p4: -+** add (w1[2-5]), w0, #?4 -+** st1w { za0h\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_0_w0_p4, -+ svst1_hor_za32 (0, w0 + 4, p0, x1), -+ svst1_hor_za32 (0, w0 + 4, p0, x1)) -+ -+/* -+** st1_za32_0_w0_m1: -+** sub (w1[2-5]), w0, #?1 -+** st1w { za0h\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_0_w0_m1, -+ svst1_hor_za32 (0, w0 - 1, p0, x1), -+ svst1_hor_za32 (0, w0 - 1, p0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za64.c -new file mode 100644 -index 000000000..3600f5b8f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za64.c -@@ -0,0 +1,105 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_za64_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** st1d { za0h\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_0_0, -+ svst1_hor_za64 (0, 0, p0, x1), -+ svst1_hor_za64 (0, 0, p0, x1)) -+ -+/* It would also be OK (and perhaps better) to move 0 into a register -+ and use an offset of 1. */ -+/* -+** st1_za64_0_1: -+** mov (w1[2-5]), #?1 -+** st1d { za0h\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_0_1, -+ svst1_hor_za64 (0, 1, p0, x1), -+ svst1_hor_za64 (0, 1, p0, x1)) -+ -+/* -+** st1_za64_0_2: -+** mov (w1[2-5]), #?2 -+** st1d { za0h\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_0_2, -+ svst1_hor_za64 (0, 2, p0, x1), -+ svst1_hor_za64 (0, 2, p0, x1)) -+ -+/* -+** st1_za64_0_w0: -+** mov (w1[2-5]), w0 -+** st1d { za0h\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_0_w0, -+ svst1_hor_za64 (0, w0, p0, x1), -+ svst1_hor_za64 (0, w0, p0, x1)) -+ -+/* -+** st1_za64_0_w0_p1: -+** mov (w1[2-5]), w0 -+** st1d { za0h\.d\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_0_w0_p1, -+ svst1_hor_za64 (0, w0 + 1, p0, x1), -+ svst1_hor_za64 (0, w0 + 1, p0, x1)) -+ -+/* -+** st1_za64_7_w0: -+** mov (w1[2-5]), w0 -+** st1d { za7h\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_7_w0, -+ svst1_hor_za64 (7, w0, p0, x1), -+ svst1_hor_za64 (7, w0, p0, x1)) -+ -+/* -+** st1_za64_7_w0_p1: -+** mov (w1[2-5]), w0 -+** st1d { za7h\.d\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_7_w0_p1, -+ svst1_hor_za64 (7, w0 + 1, p0, x1), -+ svst1_hor_za64 (7, w0 + 1, p0, x1)) -+ -+/* -+** st1_za64_5_w0_p1_index: -+** mov (w1[2-5]), w0 -+** st1d { za5h\.d\[\1, 1\] }, p0, \[x1, x2, lsl #?3\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_5_w0_p1_index, -+ svst1_hor_za64 (5, w0 + 1, p0, x1 + x2 * 8), -+ svst1_hor_za64 (5, w0 + 1, p0, x1 + x2 * 8)) -+ -+/* -+** st1_za64_0_w0_p2: -+** add (w1[2-5]), w0, #?2 -+** st1d { za0h\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_0_w0_p2, -+ svst1_hor_za64 (0, w0 + 2, p0, x1), -+ svst1_hor_za64 (0, w0 + 2, p0, x1)) -+ -+/* -+** st1_za64_0_w0_m1: -+** sub (w1[2-5]), w0, #?1 -+** st1d { za0h\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_0_w0_m1, -+ svst1_hor_za64 (0, w0 - 1, p0, x1), -+ svst1_hor_za64 (0, w0 - 1, p0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za8.c -new file mode 100644 -index 000000000..9026fae9e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_hor_za8.c -@@ -0,0 +1,95 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_za8_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za8_0_0, -+ svst1_hor_za8 (0, 0, p0, x1), -+ svst1_hor_za8 (0, 0, p0, x1)) -+ -+/* It would also be OK (and perhaps better) to move 0 into a register -+ and use an offset of 15. */ -+/* -+** st1_za8_0_15: -+** mov (w1[2-5]), #?15 -+** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za8_0_15, -+ svst1_hor_za8 (0, 15, p0, x1), -+ svst1_hor_za8 (0, 15, p0, x1)) -+ -+/* -+** st1_za8_0_16: -+** mov (w1[2-5]), #?16 -+** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za8_0_16, -+ svst1_hor_za8 (0, 16, p0, x1), -+ svst1_hor_za8 (0, 16, p0, x1)) -+ -+/* -+** st1_za8_0_w0: -+** mov (w1[2-5]), w0 -+** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za8_0_w0, -+ svst1_hor_za8 (0, w0, p0, x1), -+ svst1_hor_za8 (0, w0, p0, x1)) -+ -+/* -+** st1_za8_0_w0_p1: -+** mov (w1[2-5]), w0 -+** st1b { za0h\.b\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za8_0_w0_p1, -+ svst1_hor_za8 (0, w0 + 1, p0, x1), -+ svst1_hor_za8 (0, w0 + 1, p0, x1)) -+ -+/* -+** st1_za8_0_w0_p15: -+** mov (w1[2-5]), w0 -+** st1b { za0h\.b\[\1, 15\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za8_0_w0_p15, -+ svst1_hor_za8 (0, w0 + 15, p0, x1), -+ svst1_hor_za8 (0, w0 + 15, p0, x1)) -+ -+/* -+** st1_za8_0_w0_p13_index: -+** mov (w1[2-5]), w0 -+** st1b { za0h\.b\[\1, 15\] }, p0, \[x1, x2\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za8_0_w0_p13_index, -+ svst1_hor_za8 (0, w0 + 15, p0, x1 + x2), -+ svst1_hor_za8 (0, w0 + 15, p0, x1 + x2)) -+ -+/* -+** st1_za8_0_w0_p16: -+** add (w1[2-5]), w0, #?16 -+** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za8_0_w0_p16, -+ svst1_hor_za8 (0, w0 + 16, p0, x1), -+ svst1_hor_za8 (0, w0 + 16, p0, x1)) -+ -+/* -+** st1_za8_0_w0_m1: -+** sub (w1[2-5]), w0, #?1 -+** st1b { za0h\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za8_0_w0_m1, -+ svst1_hor_za8 (0, w0 - 1, p0, x1), -+ svst1_hor_za8 (0, w0 - 1, p0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za128.c -new file mode 100644 -index 000000000..210687a48 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za128.c -@@ -0,0 +1,77 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_vnum_za128_0_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** st1q { za0v\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za128_0_0_0, -+ svst1_ver_vnum_za128 (0, 0, p0, x1, 0), -+ svst1_ver_vnum_za128 (0, 0, p0, x1, 0)) -+ -+/* -+** st1_vnum_za128_7_1_0: -+** mov (w1[2-5]), #?1 -+** st1q { za7v\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za128_7_1_0, -+ svst1_ver_vnum_za128 (7, 1, p0, x1, 0), -+ svst1_ver_vnum_za128 (7, 1, p0, x1, 0)) -+ -+/* -+** st1_vnum_za128_11_1_5: -+** incb x1, all, mul #5 -+** mov (w1[2-5]), #?6 -+** st1q { za11v\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za128_11_1_5, -+ svst1_ver_vnum_za128 (11, 1, p0, x1, 5), -+ svst1_ver_vnum_za128 (11, 1, p0, x1, 5)) -+ -+/* -+** st1_vnum_za128_3_w0_0: -+** mov (w1[2-5]), w0 -+** st1q { za3v\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za128_3_w0_0, -+ svst1_ver_vnum_za128 (3, w0, p0, x1, 0), -+ svst1_ver_vnum_za128 (3, w0, p0, x1, 0)) -+ -+/* -+** st1_vnum_za128_5_w0_0: -+** incb x1, all, mul #13 -+** add (w1[2-5]), w0, #?13 -+** st1q { za5v\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za128_5_w0_0, -+ svst1_ver_vnum_za128 (5, w0, p0, x1, 13), -+ svst1_ver_vnum_za128 (5, w0, p0, x1, 13)) -+ -+/* -+** st1_vnum_za128_11_w0_0: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** st1q { za11v\.q\[\3, 0\] }, p0, \[\2\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za128_11_w0_0, -+ svst1_ver_vnum_za128 (11, w0, p0, x1, x2), -+ svst1_ver_vnum_za128 (11, w0, p0, x1, x2)) -+ -+/* -+** st1_vnum_za128_15_w0p1_0: -+** add (w1[2-5]), w0, #?1 -+** st1q { za15v\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za128_15_w0p1_0, -+ svst1_ver_vnum_za128 (15, w0 + 1, p0, x1, 0), -+ svst1_ver_vnum_za128 (15, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za16.c -new file mode 100644 -index 000000000..f75a22402 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za16.c -@@ -0,0 +1,123 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_vnum_za16_1_0_1: -+** incb x1 -+** mov (w1[2-5]), (?:wzr|#?0) -+** st1h { za1v\.h\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_1_0_1, -+ svst1_ver_vnum_za16 (1, 0, p0, x1, 1), -+ svst1_ver_vnum_za16 (1, 0, p0, x1, 1)) -+ -+/* -+** st1_vnum_za16_1_1_1: -+** incb x1 -+** mov (w1[2-5]), #?1 -+** st1h { za1v\.h\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_1_1_1, -+ svst1_ver_vnum_za16 (1, 1, p0, x1, 1), -+ svst1_ver_vnum_za16 (1, 1, p0, x1, 1)) -+ -+/* -+** st1_vnum_za16_0_0_8: -+** incb x1, all, mul #8 -+** mov (w1[2-5]), #?8 -+** st1h { za0v\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_0_0_8, -+ svst1_ver_vnum_za16 (0, 0, p0, x1, 8), -+ svst1_ver_vnum_za16 (0, 0, p0, x1, 8)) -+ -+/* -+** st1_vnum_za16_0_1_8: -+** incb x1, all, mul #8 -+** mov (w1[2-5]), #?9 -+** st1h { za0v\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_0_1_8, -+ svst1_ver_vnum_za16 (0, 1, p0, x1, 8), -+ svst1_ver_vnum_za16 (0, 1, p0, x1, 8)) -+ -+/* -+** st1_vnum_za16_0_w0_0: -+** mov (w1[2-5]), w0 -+** st1h { za0v\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_0_w0_0, -+ svst1_ver_vnum_za16 (0, w0, p0, x1, 0), -+ svst1_ver_vnum_za16 (0, w0, p0, x1, 0)) -+ -+/* -+** st1_vnum_za16_0_w0_1: -+** incb x1 -+** mov (w1[2-5]), w0 -+** st1h { za0v\.h\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_0_w0_1, -+ svst1_ver_vnum_za16 (0, w0, p0, x1, 1), -+ svst1_ver_vnum_za16 (0, w0, p0, x1, 1)) -+ -+/* -+** st1_vnum_za16_0_w0_7: -+** incb x1, all, mul #7 -+** mov (w1[2-5]), w0 -+** st1h { za0v\.h\[\1, 7\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_0_w0_7, -+ svst1_ver_vnum_za16 (0, w0, p0, x1, 7), -+ svst1_ver_vnum_za16 (0, w0, p0, x1, 7)) -+ -+/* -+** st1_vnum_za16_1_w0_8: -+** incb x1, all, mul #8 -+** add (w1[2-5]), w0, #?8 -+** st1h { za1v\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_1_w0_8, -+ svst1_ver_vnum_za16 (1, w0, p0, x1, 8), -+ svst1_ver_vnum_za16 (1, w0, p0, x1, 8)) -+ -+/* -+** st1_vnum_za16_1_w0_13: -+** incb x1, all, mul #13 -+** add (w1[2-5]), w0, #?13 -+** st1h { za1v\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_1_w0_13, -+ svst1_ver_vnum_za16 (1, w0, p0, x1, 13), -+ svst1_ver_vnum_za16 (1, w0, p0, x1, 13)) -+ -+/* -+** st1_vnum_za16_0_w0_x2: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** st1h { za0v\.h\[\3, 0\] }, p0, \[\2\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_0_w0_x2, -+ svst1_ver_vnum_za16 (0, w0, p0, x1, x2), -+ svst1_ver_vnum_za16 (0, w0, p0, x1, x2)) -+ -+/* -+** st1_vnum_za16_1_w0p1_0: -+** mov (w1[2-5]), w0 -+** st1h { za1v\.h\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za16_1_w0p1_0, -+ svst1_ver_vnum_za16 (1, w0 + 1, p0, x1, 0), -+ svst1_ver_vnum_za16 (1, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za32.c -new file mode 100644 -index 000000000..45db67a9f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za32.c -@@ -0,0 +1,123 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_vnum_za32_3_0_1: -+** incb x1 -+** mov (w1[2-5]), (?:wzr|#?0) -+** st1w { za3v\.s\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_3_0_1, -+ svst1_ver_vnum_za32 (3, 0, p0, x1, 1), -+ svst1_ver_vnum_za32 (3, 0, p0, x1, 1)) -+ -+/* -+** st1_vnum_za32_2_1_1: -+** incb x1 -+** mov (w1[2-5]), #?1 -+** st1w { za2v\.s\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_2_1_1, -+ svst1_ver_vnum_za32 (2, 1, p0, x1, 1), -+ svst1_ver_vnum_za32 (2, 1, p0, x1, 1)) -+ -+/* -+** st1_vnum_za32_0_0_4: -+** incb x1, all, mul #4 -+** mov (w1[2-5]), #?4 -+** st1w { za0v\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_0_0_4, -+ svst1_ver_vnum_za32 (0, 0, p0, x1, 4), -+ svst1_ver_vnum_za32 (0, 0, p0, x1, 4)) -+ -+/* -+** st1_vnum_za32_2_1_4: -+** incb x1, all, mul #4 -+** mov (w1[2-5]), #?5 -+** st1w { za2v\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_2_1_4, -+ svst1_ver_vnum_za32 (2, 1, p0, x1, 4), -+ svst1_ver_vnum_za32 (2, 1, p0, x1, 4)) -+ -+/* -+** st1_vnum_za32_0_w0_0: -+** mov (w1[2-5]), w0 -+** st1w { za0v\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_0_w0_0, -+ svst1_ver_vnum_za32 (0, w0, p0, x1, 0), -+ svst1_ver_vnum_za32 (0, w0, p0, x1, 0)) -+ -+/* -+** st1_vnum_za32_0_w0_1: -+** incb x1 -+** mov (w1[2-5]), w0 -+** st1w { za0v\.s\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_0_w0_1, -+ svst1_ver_vnum_za32 (0, w0, p0, x1, 1), -+ svst1_ver_vnum_za32 (0, w0, p0, x1, 1)) -+ -+/* -+** st1_vnum_za32_0_w0_3: -+** incb x1, all, mul #3 -+** mov (w1[2-5]), w0 -+** st1w { za0v\.s\[\1, 3\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_0_w0_3, -+ svst1_ver_vnum_za32 (0, w0, p0, x1, 3), -+ svst1_ver_vnum_za32 (0, w0, p0, x1, 3)) -+ -+/* -+** st1_vnum_za32_1_w0_4: -+** incb x1, all, mul #4 -+** add (w1[2-5]), w0, #?4 -+** st1w { za1v\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_1_w0_4, -+ svst1_ver_vnum_za32 (1, w0, p0, x1, 4), -+ svst1_ver_vnum_za32 (1, w0, p0, x1, 4)) -+ -+/* -+** st1_vnum_za32_3_w0_13: -+** incb x1, all, mul #13 -+** add (w1[2-5]), w0, #?13 -+** st1w { za3v\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_3_w0_13, -+ svst1_ver_vnum_za32 (3, w0, p0, x1, 13), -+ svst1_ver_vnum_za32 (3, w0, p0, x1, 13)) -+ -+/* -+** st1_vnum_za32_0_w0_x2: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** st1w { za0v\.s\[\3, 0\] }, p0, \[\2\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_0_w0_x2, -+ svst1_ver_vnum_za32 (0, w0, p0, x1, x2), -+ svst1_ver_vnum_za32 (0, w0, p0, x1, x2)) -+ -+/* -+** st1_vnum_za32_1_w0p1_0: -+** mov (w1[2-5]), w0 -+** st1w { za1v\.s\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za32_1_w0p1_0, -+ svst1_ver_vnum_za32 (1, w0 + 1, p0, x1, 0), -+ svst1_ver_vnum_za32 (1, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za64.c -new file mode 100644 -index 000000000..bd061fc61 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za64.c -@@ -0,0 +1,112 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_vnum_za64_3_0_1: -+** incb x1 -+** mov (w1[2-5]), (?:wzr|#?0) -+** st1d { za3v\.d\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_3_0_1, -+ svst1_ver_vnum_za64 (3, 0, p0, x1, 1), -+ svst1_ver_vnum_za64 (3, 0, p0, x1, 1)) -+ -+/* -+** st1_vnum_za64_7_1_1: -+** incb x1 -+** mov (w1[2-5]), #?1 -+** st1d { za7v\.d\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_7_1_1, -+ svst1_ver_vnum_za64 (7, 1, p0, x1, 1), -+ svst1_ver_vnum_za64 (7, 1, p0, x1, 1)) -+ -+/* -+** st1_vnum_za64_0_0_2: -+** incb x1, all, mul #2 -+** mov (w1[2-5]), #?2 -+** st1d { za0v\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_0_0_2, -+ svst1_ver_vnum_za64 (0, 0, p0, x1, 2), -+ svst1_ver_vnum_za64 (0, 0, p0, x1, 2)) -+ -+/* -+** st1_vnum_za64_5_1_2: -+** incb x1, all, mul #2 -+** mov (w1[2-5]), #?3 -+** st1d { za5v\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_5_1_2, -+ svst1_ver_vnum_za64 (5, 1, p0, x1, 2), -+ svst1_ver_vnum_za64 (5, 1, p0, x1, 2)) -+ -+/* -+** st1_vnum_za64_0_w0_0: -+** mov (w1[2-5]), w0 -+** st1d { za0v\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_0_w0_0, -+ svst1_ver_vnum_za64 (0, w0, p0, x1, 0), -+ svst1_ver_vnum_za64 (0, w0, p0, x1, 0)) -+ -+/* -+** st1_vnum_za64_0_w0_1: -+** incb x1 -+** mov (w1[2-5]), w0 -+** st1d { za0v\.d\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_0_w0_1, -+ svst1_ver_vnum_za64 (0, w0, p0, x1, 1), -+ svst1_ver_vnum_za64 (0, w0, p0, x1, 1)) -+ -+/* -+** st1_vnum_za64_6_w0_2: -+** incb x1, all, mul #2 -+** add (w1[2-5]), w0, #?2 -+** st1d { za6v\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_6_w0_2, -+ svst1_ver_vnum_za64 (6, w0, p0, x1, 2), -+ svst1_ver_vnum_za64 (6, w0, p0, x1, 2)) -+ -+/* -+** st1_vnum_za64_2_w0_13: -+** incb x1, all, mul #13 -+** add (w1[2-5]), w0, #?13 -+** st1d { za2v\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_2_w0_13, -+ svst1_ver_vnum_za64 (2, w0, p0, x1, 13), -+ svst1_ver_vnum_za64 (2, w0, p0, x1, 13)) -+ -+/* -+** st1_vnum_za64_4_w0_x2: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:\1, x2|x2, \1), x1 -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** st1d { za4v\.d\[\3, 0\] }, p0, \[\2\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_4_w0_x2, -+ svst1_ver_vnum_za64 (4, w0, p0, x1, x2), -+ svst1_ver_vnum_za64 (4, w0, p0, x1, x2)) -+ -+/* -+** st1_vnum_za64_1_w0p1_0: -+** mov (w1[2-5]), w0 -+** st1d { za1v\.d\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za64_1_w0p1_0, -+ svst1_ver_vnum_za64 (1, w0 + 1, p0, x1, 0), -+ svst1_ver_vnum_za64 (1, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za8.c -new file mode 100644 -index 000000000..b15a7eb08 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_vnum_za8.c -@@ -0,0 +1,112 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_vnum_za8_0_0_1: -+** incb x1 -+** mov (w1[2-5]), (?:wzr|#?0) -+** st1b { za0v\.b\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_0_1, -+ svst1_ver_vnum_za8 (0, 0, p0, x1, 1), -+ svst1_ver_vnum_za8 (0, 0, p0, x1, 1)) -+ -+/* -+** st1_vnum_za8_0_1_1: -+** incb x1 -+** mov (w1[2-5]), #?1 -+** st1b { za0v\.b\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_1_1, -+ svst1_ver_vnum_za8 (0, 1, p0, x1, 1), -+ svst1_ver_vnum_za8 (0, 1, p0, x1, 1)) -+ -+/* -+** st1_vnum_za8_0_0_16: -+** incb x1, all, mul #16 -+** mov (w1[2-5]), #?16 -+** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_0_16, -+ svst1_ver_vnum_za8 (0, 0, p0, x1, 16), -+ svst1_ver_vnum_za8 (0, 0, p0, x1, 16)) -+ -+/* -+** st1_vnum_za8_0_1_16: -+** incb x1, all, mul #16 -+** mov (w1[2-5]), #?17 -+** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_1_16, -+ svst1_ver_vnum_za8 (0, 1, p0, x1, 16), -+ svst1_ver_vnum_za8 (0, 1, p0, x1, 16)) -+ -+/* -+** st1_vnum_za8_0_w0_0: -+** mov (w1[2-5]), w0 -+** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_w0_0, -+ svst1_ver_vnum_za8 (0, w0, p0, x1, 0), -+ svst1_ver_vnum_za8 (0, w0, p0, x1, 0)) -+ -+/* -+** st1_vnum_za8_0_w0_1: -+** incb x1 -+** mov (w1[2-5]), w0 -+** st1b { za0v\.b\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_w0_1, -+ svst1_ver_vnum_za8 (0, w0, p0, x1, 1), -+ svst1_ver_vnum_za8 (0, w0, p0, x1, 1)) -+ -+/* -+** st1_vnum_za8_0_w0_15: -+** incb x1, all, mul #15 -+** mov (w1[2-5]), w0 -+** st1b { za0v\.b\[\1, 15\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_w0_15, -+ svst1_ver_vnum_za8 (0, w0, p0, x1, 15), -+ svst1_ver_vnum_za8 (0, w0, p0, x1, 15)) -+ -+/* -+** st1_vnum_za8_0_w0_16: -+** incb x1, all, mul #16 -+** add (w1[2-5]), w0, #?16 -+** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_w0_16, -+ svst1_ver_vnum_za8 (0, w0, p0, x1, 16), -+ svst1_ver_vnum_za8 (0, w0, p0, x1, 16)) -+ -+/* -+** st1_vnum_za8_0_w0_x2: -+** cntb (x[0-9]+) -+** mul (x[0-9]+), (?:\1, x2|x2, \1) -+** add (w1[2-5]), (?:w0, w2|w2, w0) -+** st1b { za0v\.b\[\3, 0\] }, p0, \[x1, \2\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_w0_x2, -+ svst1_ver_vnum_za8 (0, w0, p0, x1, x2), -+ svst1_ver_vnum_za8 (0, w0, p0, x1, x2)) -+ -+/* -+** st1_vnum_za8_0_w0p1_0: -+** mov (w1[2-5]), w0 -+** st1b { za0v\.b\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_vnum_za8_0_w0p1_0, -+ svst1_ver_vnum_za8 (0, w0 + 1, p0, x1, 0), -+ svst1_ver_vnum_za8 (0, w0 + 1, p0, x1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za128.c -new file mode 100644 -index 000000000..7be6d5a5f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za128.c -@@ -0,0 +1,83 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_za128_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** st1q { za0v\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za128_0_0, -+ svst1_ver_za128 (0, 0, p0, x1), -+ svst1_ver_za128 (0, 0, p0, x1)) -+ -+/* -+** st1_za128_0_1: -+** mov (w1[2-5]), #?1 -+** st1q { za0v\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za128_0_1, -+ svst1_ver_za128 (0, 1, p0, x1), -+ svst1_ver_za128 (0, 1, p0, x1)) -+ -+/* -+** st1_za128_0_w0: -+** mov (w1[2-5]), w0 -+** st1q { za0v\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za128_0_w0, -+ svst1_ver_za128 (0, w0, p0, x1), -+ svst1_ver_za128 (0, w0, p0, x1)) -+ -+/* -+** st1_za128_0_w0_p1: -+** add (w1[2-5]), w0, #?1 -+** st1q { za0v\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za128_0_w0_p1, -+ svst1_ver_za128 (0, w0 + 1, p0, x1), -+ svst1_ver_za128 (0, w0 + 1, p0, x1)) -+ -+/* -+** st1_za128_7_w0: -+** mov (w1[2-5]), w0 -+** st1q { za7v\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za128_7_w0, -+ svst1_ver_za128 (7, w0, p0, x1), -+ svst1_ver_za128 (7, w0, p0, x1)) -+ -+/* -+** st1_za128_13_w0: -+** mov (w1[2-5]), w0 -+** st1q { za13v\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za128_13_w0, -+ svst1_ver_za128 (13, w0, p0, x1), -+ svst1_ver_za128 (13, w0, p0, x1)) -+ -+/* -+** st1_za128_15_w0: -+** mov (w1[2-5]), w0 -+** st1q { za15v\.q\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za128_15_w0, -+ svst1_ver_za128 (15, w0, p0, x1), -+ svst1_ver_za128 (15, w0, p0, x1)) -+ -+/* -+** st1_za128_9_w0_index: -+** mov (w1[2-5]), w0 -+** st1q { za9v\.q\[\1, 0\] }, p0, \[x1, x2, lsl #?4\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za128_9_w0_index, -+ svst1_ver_za128 (9, w0, p0, x1 + x2 * 16), -+ svst1_ver_za128 (9, w0, p0, x1 + x2 * 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za16.c -new file mode 100644 -index 000000000..1bbf12a14 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za16.c -@@ -0,0 +1,126 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_za16_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** st1h { za0v\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_0_0, -+ svst1_ver_za16 (0, 0, p0, x1), -+ svst1_ver_za16 (0, 0, p0, x1)) -+ -+/* It would also be OK (and perhaps better) to move 0 into a register -+ and use an offset of 7. */ -+/* -+** st1_za16_0_7: -+** mov (w1[2-5]), #?7 -+** st1h { za0v\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_0_7, -+ svst1_ver_za16 (0, 7, p0, x1), -+ svst1_ver_za16 (0, 7, p0, x1)) -+ -+/* -+** st1_za16_0_8: -+** mov (w1[2-5]), #?8 -+** st1h { za0v\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_0_8, -+ svst1_ver_za16 (0, 8, p0, x1), -+ svst1_ver_za16 (0, 8, p0, x1)) -+ -+/* -+** st1_za16_0_w0: -+** mov (w1[2-5]), w0 -+** st1h { za0v\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_0_w0, -+ svst1_ver_za16 (0, w0, p0, x1), -+ svst1_ver_za16 (0, w0, p0, x1)) -+ -+/* -+** st1_za16_0_w0_p1: -+** mov (w1[2-5]), w0 -+** st1h { za0v\.h\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_0_w0_p1, -+ svst1_ver_za16 (0, w0 + 1, p0, x1), -+ svst1_ver_za16 (0, w0 + 1, p0, x1)) -+ -+/* -+** st1_za16_0_w0_p7: -+** mov (w1[2-5]), w0 -+** st1h { za0v\.h\[\1, 7\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_0_w0_p7, -+ svst1_ver_za16 (0, w0 + 7, p0, x1), -+ svst1_ver_za16 (0, w0 + 7, p0, x1)) -+ -+/* -+** st1_za16_1_w0: -+** mov (w1[2-5]), w0 -+** st1h { za1v\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_1_w0, -+ svst1_ver_za16 (1, w0, p0, x1), -+ svst1_ver_za16 (1, w0, p0, x1)) -+ -+ -+/* -+** st1_za16_1_w0_p1: -+** mov (w1[2-5]), w0 -+** st1h { za1v\.h\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_1_w0_p1, -+ svst1_ver_za16 (1, w0 + 1, p0, x1), -+ svst1_ver_za16 (1, w0 + 1, p0, x1)) -+ -+/* -+** st1_za16_1_w0_p7: -+** mov (w1[2-5]), w0 -+** st1h { za1v\.h\[\1, 7\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_1_w0_p7, -+ svst1_ver_za16 (1, w0 + 7, p0, x1), -+ svst1_ver_za16 (1, w0 + 7, p0, x1)) -+ -+/* -+** st1_za16_1_w0_p5_index: -+** mov (w1[2-5]), w0 -+** st1h { za1v\.h\[\1, 5\] }, p0, \[x1, x2, lsl #?1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_1_w0_p5_index, -+ svst1_ver_za16 (1, w0 + 5, p0, x1 + x2 * 2), -+ svst1_ver_za16 (1, w0 + 5, p0, x1 + x2 * 2)) -+ -+/* -+** st1_za16_0_w0_p8: -+** add (w1[2-5]), w0, #?8 -+** st1h { za0v\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_0_w0_p8, -+ svst1_ver_za16 (0, w0 + 8, p0, x1), -+ svst1_ver_za16 (0, w0 + 8, p0, x1)) -+ -+/* -+** st1_za16_0_w0_m1: -+** sub (w1[2-5]), w0, #?1 -+** st1h { za0v\.h\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za16_0_w0_m1, -+ svst1_ver_za16 (0, w0 - 1, p0, x1), -+ svst1_ver_za16 (0, w0 - 1, p0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za32.c -new file mode 100644 -index 000000000..9809e9708 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za32.c -@@ -0,0 +1,125 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_za32_0_0: -+** mov (w1[2-5]), (?:w0|#?0) -+** st1w { za0v\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_0_0, -+ svst1_ver_za32 (0, 0, p0, x1), -+ svst1_ver_za32 (0, 0, p0, x1)) -+ -+/* It would also be OK (and perhaps better) to move 0 into a register -+ and use an offset of 3. */ -+/* -+** st1_za32_0_3: -+** mov (w1[2-5]), #?3 -+** st1w { za0v\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_0_3, -+ svst1_ver_za32 (0, 3, p0, x1), -+ svst1_ver_za32 (0, 3, p0, x1)) -+ -+/* -+** st1_za32_0_4: -+** mov (w1[2-5]), #?4 -+** st1w { za0v\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_0_4, -+ svst1_ver_za32 (0, 4, p0, x1), -+ svst1_ver_za32 (0, 4, p0, x1)) -+ -+/* -+** st1_za32_0_w0: -+** mov (w1[2-5]), w0 -+** st1w { za0v\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_0_w0, -+ svst1_ver_za32 (0, w0, p0, x1), -+ svst1_ver_za32 (0, w0, p0, x1)) -+ -+/* -+** st1_za32_0_w0_p1: -+** mov (w1[2-5]), w0 -+** st1w { za0v\.s\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_0_w0_p1, -+ svst1_ver_za32 (0, w0 + 1, p0, x1), -+ svst1_ver_za32 (0, w0 + 1, p0, x1)) -+ -+/* -+** st1_za32_0_w0_p3: -+** mov (w1[2-5]), w0 -+** st1w { za0v\.s\[\1, 3\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_0_w0_p3, -+ svst1_ver_za32 (0, w0 + 3, p0, x1), -+ svst1_ver_za32 (0, w0 + 3, p0, x1)) -+ -+/* -+** st1_za32_3_w0: -+** mov (w1[2-5]), w0 -+** st1w { za3v\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_3_w0, -+ svst1_ver_za32 (3, w0, p0, x1), -+ svst1_ver_za32 (3, w0, p0, x1)) -+ -+/* -+** st1_za32_3_w0_p1: -+** mov (w1[2-5]), w0 -+** st1w { za3v\.s\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_3_w0_p1, -+ svst1_ver_za32 (3, w0 + 1, p0, x1), -+ svst1_ver_za32 (3, w0 + 1, p0, x1)) -+ -+/* -+** st1_za32_3_w0_p3: -+** mov (w1[2-5]), w0 -+** st1w { za3v\.s\[\1, 3\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_3_w0_p3, -+ svst1_ver_za32 (3, w0 + 3, p0, x1), -+ svst1_ver_za32 (3, w0 + 3, p0, x1)) -+ -+/* -+** st1_za32_1_w0_p2_index: -+** mov (w1[2-5]), w0 -+** st1w { za1v\.s\[\1, 2\] }, p0, \[x1, x2, lsl #?2\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_1_w0_p2_index, -+ svst1_ver_za32 (1, w0 + 2, p0, x1 + x2 * 4), -+ svst1_ver_za32 (1, w0 + 2, p0, x1 + x2 * 4)) -+ -+/* -+** st1_za32_0_w0_p4: -+** add (w1[2-5]), w0, #?4 -+** st1w { za0v\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_0_w0_p4, -+ svst1_ver_za32 (0, w0 + 4, p0, x1), -+ svst1_ver_za32 (0, w0 + 4, p0, x1)) -+ -+/* -+** st1_za32_0_w0_m1: -+** sub (w1[2-5]), w0, #?1 -+** st1w { za0v\.s\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za32_0_w0_m1, -+ svst1_ver_za32 (0, w0 - 1, p0, x1), -+ svst1_ver_za32 (0, w0 - 1, p0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za64.c -new file mode 100644 -index 000000000..0e93f4da3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za64.c -@@ -0,0 +1,105 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_za64_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** st1d { za0v\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_0_0, -+ svst1_ver_za64 (0, 0, p0, x1), -+ svst1_ver_za64 (0, 0, p0, x1)) -+ -+/* It would also be OK (and perhaps better) to move 0 into a register -+ and use an offset of 1. */ -+/* -+** st1_za64_0_1: -+** mov (w1[2-5]), #?1 -+** st1d { za0v\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_0_1, -+ svst1_ver_za64 (0, 1, p0, x1), -+ svst1_ver_za64 (0, 1, p0, x1)) -+ -+/* -+** st1_za64_0_2: -+** mov (w1[2-5]), #?2 -+** st1d { za0v\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_0_2, -+ svst1_ver_za64 (0, 2, p0, x1), -+ svst1_ver_za64 (0, 2, p0, x1)) -+ -+/* -+** st1_za64_0_w0: -+** mov (w1[2-5]), w0 -+** st1d { za0v\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_0_w0, -+ svst1_ver_za64 (0, w0, p0, x1), -+ svst1_ver_za64 (0, w0, p0, x1)) -+ -+/* -+** st1_za64_0_w0_p1: -+** mov (w1[2-5]), w0 -+** st1d { za0v\.d\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_0_w0_p1, -+ svst1_ver_za64 (0, w0 + 1, p0, x1), -+ svst1_ver_za64 (0, w0 + 1, p0, x1)) -+ -+/* -+** st1_za64_7_w0: -+** mov (w1[2-5]), w0 -+** st1d { za7v\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_7_w0, -+ svst1_ver_za64 (7, w0, p0, x1), -+ svst1_ver_za64 (7, w0, p0, x1)) -+ -+/* -+** st1_za64_7_w0_p1: -+** mov (w1[2-5]), w0 -+** st1d { za7v\.d\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_7_w0_p1, -+ svst1_ver_za64 (7, w0 + 1, p0, x1), -+ svst1_ver_za64 (7, w0 + 1, p0, x1)) -+ -+/* -+** st1_za64_5_w0_p1_index: -+** mov (w1[2-5]), w0 -+** st1d { za5v\.d\[\1, 1\] }, p0, \[x1, x2, lsl #?3\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_5_w0_p1_index, -+ svst1_ver_za64 (5, w0 + 1, p0, x1 + x2 * 8), -+ svst1_ver_za64 (5, w0 + 1, p0, x1 + x2 * 8)) -+ -+/* -+** st1_za64_0_w0_p2: -+** add (w1[2-5]), w0, #?2 -+** st1d { za0v\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_0_w0_p2, -+ svst1_ver_za64 (0, w0 + 2, p0, x1), -+ svst1_ver_za64 (0, w0 + 2, p0, x1)) -+ -+/* -+** st1_za64_0_w0_m1: -+** sub (w1[2-5]), w0, #?1 -+** st1d { za0v\.d\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za64_0_w0_m1, -+ svst1_ver_za64 (0, w0 - 1, p0, x1), -+ svst1_ver_za64 (0, w0 - 1, p0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za8.c -new file mode 100644 -index 000000000..c76b5c28b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/st1_ver_za8.c -@@ -0,0 +1,95 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** st1_za8_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za8_0_0, -+ svst1_ver_za8 (0, 0, p0, x1), -+ svst1_ver_za8 (0, 0, p0, x1)) -+ -+/* It would also be OK (and perhaps better) to move 0 into a register -+ and use an offset of 15. */ -+/* -+** st1_za8_0_15: -+** mov (w1[2-5]), #?15 -+** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za8_0_15, -+ svst1_ver_za8 (0, 15, p0, x1), -+ svst1_ver_za8 (0, 15, p0, x1)) -+ -+/* -+** st1_za8_0_16: -+** mov (w1[2-5]), #?16 -+** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za8_0_16, -+ svst1_ver_za8 (0, 16, p0, x1), -+ svst1_ver_za8 (0, 16, p0, x1)) -+ -+/* -+** st1_za8_0_w0: -+** mov (w1[2-5]), w0 -+** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za8_0_w0, -+ svst1_ver_za8 (0, w0, p0, x1), -+ svst1_ver_za8 (0, w0, p0, x1)) -+ -+/* -+** st1_za8_0_w0_p1: -+** mov (w1[2-5]), w0 -+** st1b { za0v\.b\[\1, 1\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za8_0_w0_p1, -+ svst1_ver_za8 (0, w0 + 1, p0, x1), -+ svst1_ver_za8 (0, w0 + 1, p0, x1)) -+ -+/* -+** st1_za8_0_w0_p15: -+** mov (w1[2-5]), w0 -+** st1b { za0v\.b\[\1, 15\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za8_0_w0_p15, -+ svst1_ver_za8 (0, w0 + 15, p0, x1), -+ svst1_ver_za8 (0, w0 + 15, p0, x1)) -+ -+/* -+** st1_za8_0_w0_p13_index: -+** mov (w1[2-5]), w0 -+** st1b { za0v\.b\[\1, 15\] }, p0, \[x1, x2\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za8_0_w0_p13_index, -+ svst1_ver_za8 (0, w0 + 15, p0, x1 + x2), -+ svst1_ver_za8 (0, w0 + 15, p0, x1 + x2)) -+ -+/* -+** st1_za8_0_w0_p16: -+** add (w1[2-5]), w0, #?16 -+** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za8_0_w0_p16, -+ svst1_ver_za8 (0, w0 + 16, p0, x1), -+ svst1_ver_za8 (0, w0 + 16, p0, x1)) -+ -+/* -+** st1_za8_0_w0_m1: -+** sub (w1[2-5]), w0, #?1 -+** st1b { za0v\.b\[\1, 0\] }, p0, \[x1\] -+** ret -+*/ -+TEST_STORE_ZA (st1_za8_0_w0_m1, -+ svst1_ver_za8 (0, w0 - 1, p0, x1), -+ svst1_ver_za8 (0, w0 - 1, p0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_s.c -new file mode 100644 -index 000000000..3ef7e0c09 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_s.c -@@ -0,0 +1,147 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** str_vnum_za_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_0_0, -+ svstr_vnum_za (0, x1, 0), -+ svstr_vnum_za (0, x1, 0)) -+ -+/* -+** str_vnum_za_0_1: -+** mov (w1[2-5]), (?:wzr|#?0) -+** str za\[\1, 1\], \[x1(?:, #1, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_0_1, -+ svstr_vnum_za (0, x1, 1), -+ svstr_vnum_za (0, x1, 1)) -+ -+/* -+** str_vnum_za_1_0: -+** mov (w1[2-5]), #?1 -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_1_0, -+ svstr_vnum_za (1, x1, 0), -+ svstr_vnum_za (1, x1, 0)) -+ -+/* -+** str_vnum_za_1_2: -+** mov (w1[2-5]), #?1 -+** str za\[\1, 2\], \[x1(?:, #2, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_1_2, -+ svstr_vnum_za (1, x1, 2), -+ svstr_vnum_za (1, x1, 2)) -+ -+/* -+** str_vnum_za_w0_0: -+** mov (w1[2-5]), w0 -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_w0_0, -+ svstr_vnum_za (w0, x1, 0), -+ svstr_vnum_za (w0, x1, 0)) -+ -+/* -+** str_vnum_za_w0_1: -+** mov (w1[2-5]), w0 -+** str za\[\1, 1\], \[x1, #1, mul vl\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_w0_1, -+ svstr_vnum_za (w0, x1, 1), -+ svstr_vnum_za (w0, x1, 1)) -+ -+/* -+** str_vnum_za_w0_13: -+** mov (w1[2-5]), w0 -+** str za\[\1, 13\], \[x1, #13, mul vl\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_w0_13, -+ svstr_vnum_za (w0, x1, 13), -+ svstr_vnum_za (w0, x1, 13)) -+ -+/* -+** str_vnum_za_w0_15: -+** mov (w1[2-5]), w0 -+** str za\[\1, 15\], \[x1, #15, mul vl\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_w0_15, -+ svstr_vnum_za (w0, x1, 15), -+ svstr_vnum_za (w0, x1, 15)) -+ -+/* -+** str_vnum_za_w0_16: -+** ( -+** add (w1[2-5]), w0, #?16 -+** incb x1, all, mul #16 -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** | -+** incb x1, all, mul #16 -+** add (w1[2-5]), w0, #?16 -+** str za\[\2, 0\], \[x1(?:, #0, mul vl)?\] -+** ) -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_w0_16, -+ svstr_vnum_za (w0, x1, 16), -+ svstr_vnum_za (w0, x1, 16)) -+ -+/* -+** str_vnum_za_w0_m1: -+** ( -+** sub (w1[2-5]), w0, #?1 -+** decb x1 -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** | -+** decb x1 -+** sub (w1[2-5]), w0, #?1 -+** str za\[\2, 0\], \[x1(?:, #0, mul vl)?\] -+** ) -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_w0_m1, -+ svstr_vnum_za (w0, x1, -1), -+ svstr_vnum_za (w0, x1, -1)) -+ -+/* -+** str_vnum_za_w0p1_0: -+** add (w1[2-5]), w0, #?1 -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_w0p1_0, -+ svstr_vnum_za (w0 + 1, x1, 0), -+ svstr_vnum_za (w0 + 1, x1, 0)) -+ -+/* -+** str_vnum_za_w0m1_1: -+** sub (w1[2-5]), w0, #?1 -+** str za\[\1, 1\], \[x1(?:, #1, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_w0m1_1, -+ svstr_vnum_za (w0 - 1, x1, 1), -+ svstr_vnum_za (w0 - 1, x1, 1)) -+ -+/* -+** str_vnum_za_w0p2_3: -+** add (w1[2-5]), w0, #?2 -+** str za\[\1, 3\], \[x1(?:, #3, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_w0p2_3, -+ svstr_vnum_za (w0 + 2, x1, 3), -+ svstr_vnum_za (w0 + 2, x1, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_sc.c -new file mode 100644 -index 000000000..7cd09e67c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_vnum_za_sc.c -@@ -0,0 +1,148 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#define STREAMING_COMPATIBLE -+#include "test_sme_acle.h" -+ -+/* -+** str_vnum_za_0_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_0_0, -+ svstr_vnum_za (0, x1, 0), -+ svstr_vnum_za (0, x1, 0)) -+ -+/* -+** str_vnum_za_0_1: -+** mov (w1[2-5]), (?:wzr|#?0) -+** str za\[\1, 1\], \[x1(?:, #1, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_0_1, -+ svstr_vnum_za (0, x1, 1), -+ svstr_vnum_za (0, x1, 1)) -+ -+/* -+** str_vnum_za_1_0: -+** mov (w1[2-5]), #?1 -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_1_0, -+ svstr_vnum_za (1, x1, 0), -+ svstr_vnum_za (1, x1, 0)) -+ -+/* -+** str_vnum_za_1_2: -+** mov (w1[2-5]), #?1 -+** str za\[\1, 2\], \[x1(?:, #2, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_1_2, -+ svstr_vnum_za (1, x1, 2), -+ svstr_vnum_za (1, x1, 2)) -+ -+/* -+** str_vnum_za_w0_0: -+** mov (w1[2-5]), w0 -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_w0_0, -+ svstr_vnum_za (w0, x1, 0), -+ svstr_vnum_za (w0, x1, 0)) -+ -+/* -+** str_vnum_za_w0_1: -+** mov (w1[2-5]), w0 -+** str za\[\1, 1\], \[x1, #1, mul vl\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_w0_1, -+ svstr_vnum_za (w0, x1, 1), -+ svstr_vnum_za (w0, x1, 1)) -+ -+/* -+** str_vnum_za_w0_13: -+** mov (w1[2-5]), w0 -+** str za\[\1, 13\], \[x1, #13, mul vl\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_w0_13, -+ svstr_vnum_za (w0, x1, 13), -+ svstr_vnum_za (w0, x1, 13)) -+ -+/* -+** str_vnum_za_w0_15: -+** mov (w1[2-5]), w0 -+** str za\[\1, 15\], \[x1, #15, mul vl\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_w0_15, -+ svstr_vnum_za (w0, x1, 15), -+ svstr_vnum_za (w0, x1, 15)) -+ -+/* -+** str_vnum_za_w0_16: -+** ( -+** add (w1[2-5]), w0, #?16 -+** addsvl (x[0-9]+), x1, #16 -+** str za\[\1, 0\], \[\2(?:, #0, mul vl)?\] -+** | -+** addsvl (x[0-9]+), x1, #16 -+** add (w1[2-5]), w0, #?16 -+** str za\[\4, 0\], \[\3(?:, #0, mul vl)?\] -+** ) -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_w0_16, -+ svstr_vnum_za (w0, x1, 16), -+ svstr_vnum_za (w0, x1, 16)) -+ -+/* -+** str_vnum_za_w0_m1: -+** ( -+** sub (w1[2-5]), w0, #?1 -+** addsvl (x[0-9]+), x1, #-1 -+** str za\[\1, 0\], \[\2(?:, #0, mul vl)?\] -+** | -+** addsvl (x[0-9]+), x1, #-1 -+** sub (w1[2-5]), w0, #?1 -+** str za\[\4, 0\], \[\3(?:, #0, mul vl)?\] -+** ) -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_w0_m1, -+ svstr_vnum_za (w0, x1, -1), -+ svstr_vnum_za (w0, x1, -1)) -+ -+/* -+** str_vnum_za_w0p1_0: -+** add (w1[2-5]), w0, #?1 -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_w0p1_0, -+ svstr_vnum_za (w0 + 1, x1, 0), -+ svstr_vnum_za (w0 + 1, x1, 0)) -+ -+/* -+** str_vnum_za_w0m1_1: -+** sub (w1[2-5]), w0, #?1 -+** str za\[\1, 1\], \[x1(?:, #1, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_w0m1_1, -+ svstr_vnum_za (w0 - 1, x1, 1), -+ svstr_vnum_za (w0 - 1, x1, 1)) -+ -+/* -+** str_vnum_za_w0p2_3: -+** add (w1[2-5]), w0, #?2 -+** str za\[\1, 3\], \[x1(?:, #3, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_vnum_za_w0p2_3, -+ svstr_vnum_za (w0 + 2, x1, 3), -+ svstr_vnum_za (w0 + 2, x1, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_s.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_s.c -new file mode 100644 -index 000000000..4d953c596 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_s.c -@@ -0,0 +1,124 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** str_za_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_za_0, -+ svstr_za (0, x1), -+ svstr_za (0, x1)) -+ -+/* -+** str_za_1: -+** mov (w1[2-5]), #?1 -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_za_1, -+ svstr_za (1, x1), -+ svstr_za (1, x1)) -+ -+/* -+** str_za_w0: -+** mov (w1[2-5]), w0 -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_za_w0, -+ svstr_za (w0, x1), -+ svstr_za (w0, x1)) -+ -+/* -+** str_za_w0_1_vnum: -+** mov (w1[2-5]), w0 -+** str za\[\1, 1\], \[x1, #1, mul vl\] -+** ret -+*/ -+TEST_STORE_ZA (str_za_w0_1_vnum, -+ svstr_za (w0 + 1, x1 + svcntsb ()), -+ svstr_za (w0 + 1, x1 + svcntsb ())) -+ -+/* -+** str_za_w0_13_vnum: -+** mov (w1[2-5]), w0 -+** str za\[\1, 13\], \[x1, #13, mul vl\] -+** ret -+*/ -+TEST_STORE_ZA (str_za_w0_13_vnum, -+ svstr_za (w0 + 13, x1 + svcntsb () * 13), -+ svstr_za (w0 + 13, x1 + svcntsb () * 13)) -+ -+/* -+** str_za_w0_15_vnum: -+** mov (w1[2-5]), w0 -+** str za\[\1, 15\], \[x1, #15, mul vl\] -+** ret -+*/ -+TEST_STORE_ZA (str_za_w0_15_vnum, -+ svstr_za (w0 + 15, x1 + svcntsb () * 15), -+ svstr_za (w0 + 15, x1 + svcntsb () * 15)) -+ -+/* -+** str_za_w0_16_vnum: -+** ( -+** add (w1[2-5]), w0, #?16 -+** incb x1, all, mul #16 -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** | -+** incb x1, all, mul #16 -+** add (w1[2-5]), w0, #?16 -+** str za\[\2, 0\], \[x1(?:, #0, mul vl)?\] -+** ) -+** ret -+*/ -+TEST_STORE_ZA (str_za_w0_16_vnum, -+ svstr_za (w0 + 16, x1 + svcntsb () * 16), -+ svstr_za (w0 + 16, x1 + svcntsb () * 16)) -+ -+/* -+** str_za_w0_m1_vnum: -+** ( -+** sub (w1[2-5]), w0, #?1 -+** decb x1 -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** | -+** decb x1 -+** sub (w1[2-5]), w0, #?1 -+** str za\[\2, 0\], \[x1(?:, #0, mul vl)?\] -+** ) -+** ret -+*/ -+TEST_STORE_ZA (str_za_w0_m1_vnum, -+ svstr_za (w0 - 1, x1 - svcntsb ()), -+ svstr_za (w0 - 1, x1 - svcntsb ())) -+ -+/* -+** str_za_w0p2: -+** add (w1[2-5]), w0, #?2 -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_za_w0p2, -+ svstr_za (w0 + 2, x1), -+ svstr_za (w0 + 2, x1)) -+ -+/* -+** str_za_offset: -+** ( -+** mov (w1[2-5]), w0 -+** add (x[0-9]+), x1, #?1 -+** str za\[\1, 0\], \[\2(?:, #0, mul vl)?\] -+** | -+** add (x[0-9]+), x1, #?1 -+** mov (w1[2-5]), w0 -+** str za\[\4, 0\], \[\3(?:, #0, mul vl)?\] -+** ) -+** ret -+*/ -+TEST_STORE_ZA (str_za_offset, -+ svstr_za (w0, x1 + 1), -+ svstr_za (w0, x1 + 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_sc.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_sc.c -new file mode 100644 -index 000000000..3406055e7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/str_za_sc.c -@@ -0,0 +1,71 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#define STREAMING_COMPATIBLE -+#include "test_sme_acle.h" -+ -+/* -+** str_za_0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_za_0, -+ svstr_za (0, x1), -+ svstr_za (0, x1)) -+ -+/* -+** str_za_1: -+** mov (w1[2-5]), #?1 -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_za_1, -+ svstr_za (1, x1), -+ svstr_za (1, x1)) -+ -+/* -+** str_za_w0: -+** mov (w1[2-5]), w0 -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_za_w0, -+ svstr_za (w0, x1), -+ svstr_za (w0, x1)) -+ -+/* -+** str_za_w0_1_vnum: -+** mov (w1[2-5]), w0 -+** str za\[\1, 1\], \[x1, #1, mul vl\] -+** ret -+*/ -+TEST_STORE_ZA (str_za_w0_1_vnum, -+ svstr_za (w0 + 1, x1 + svcntsb ()), -+ svstr_za (w0 + 1, x1 + svcntsb ())) -+ -+/* -+** str_za_w0p2: -+** add (w1[2-5]), w0, #?2 -+** str za\[\1, 0\], \[x1(?:, #0, mul vl)?\] -+** ret -+*/ -+TEST_STORE_ZA (str_za_w0p2, -+ svstr_za (w0 + 2, x1), -+ svstr_za (w0 + 2, x1)) -+ -+/* -+** str_za_offset: -+** ( -+** mov (w1[2-5]), w0 -+** add (x[0-9]+), x1, #?1 -+** str za\[\1, 0\], \[\2(?:, #0, mul vl)?\] -+** | -+** add (x[0-9]+), x1, #?1 -+** mov (w1[2-5]), w0 -+** str za\[\4, 0\], \[\3(?:, #0, mul vl)?\] -+** ) -+** ret -+*/ -+TEST_STORE_ZA (str_za_offset, -+ svstr_za (w0, x1 + 1), -+ svstr_za (w0, x1 + 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za32.c -new file mode 100644 -index 000000000..9dd66f722 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** sumopa_za32_s8_0_p0_p1_z0_z4: -+** sumopa za0\.s, p0/m, p1/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_ZA (sumopa_za32_s8_0_p0_p1_z0_z4, svint8_t, svuint8_t, -+ svsumopa_za32_s8_m (0, p0, p1, z0, z4), -+ svsumopa_za32_m (0, p0, p1, z0, z4)) -+ -+/* -+** sumopa_za32_s8_0_p1_p0_z4_z0: -+** sumopa za0\.s, p1/m, p0/m, z4\.b, z0\.b -+** ret -+*/ -+TEST_DUAL_ZA (sumopa_za32_s8_0_p1_p0_z4_z0, svuint8_t, svint8_t, -+ svsumopa_za32_s8_m (0, p1, p0, z4, z0), -+ svsumopa_za32_m (0, p1, p0, z4, z0)) -+ -+/* -+** sumopa_za32_s8_3_p0_p1_z0_z4: -+** sumopa za3\.s, p0/m, p1/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_ZA (sumopa_za32_s8_3_p0_p1_z0_z4, svint8_t, svuint8_t, -+ svsumopa_za32_s8_m (3, p0, p1, z0, z4), -+ svsumopa_za32_m (3, p0, p1, z0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za64.c -new file mode 100644 -index 000000000..2a78ab85d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumopa_za64.c -@@ -0,0 +1,32 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+#pragma GCC target "+sme-i16i64" -+ -+/* -+** sumopa_za64_s16_0_p0_p1_z0_z4: -+** sumopa za0\.d, p0/m, p1/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_ZA (sumopa_za64_s16_0_p0_p1_z0_z4, svint16_t, svuint16_t, -+ svsumopa_za64_s16_m (0, p0, p1, z0, z4), -+ svsumopa_za64_m (0, p0, p1, z0, z4)) -+ -+/* -+** sumopa_za64_s16_0_p1_p0_z4_z0: -+** sumopa za0\.d, p1/m, p0/m, z4\.h, z0\.h -+** ret -+*/ -+TEST_DUAL_ZA (sumopa_za64_s16_0_p1_p0_z4_z0, svuint16_t, svint16_t, -+ svsumopa_za64_s16_m (0, p1, p0, z4, z0), -+ svsumopa_za64_m (0, p1, p0, z4, z0)) -+ -+/* -+** sumopa_za64_s16_7_p0_p1_z0_z4: -+** sumopa za7\.d, p0/m, p1/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_ZA (sumopa_za64_s16_7_p0_p1_z0_z4, svint16_t, svuint16_t, -+ svsumopa_za64_s16_m (7, p0, p1, z0, z4), -+ svsumopa_za64_m (7, p0, p1, z0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za32.c -new file mode 100644 -index 000000000..55cb92d1b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** sumops_za32_s8_0_p0_p1_z0_z4: -+** sumops za0\.s, p0/m, p1/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_ZA (sumops_za32_s8_0_p0_p1_z0_z4, svint8_t, svuint8_t, -+ svsumops_za32_s8_m (0, p0, p1, z0, z4), -+ svsumops_za32_m (0, p0, p1, z0, z4)) -+ -+/* -+** sumops_za32_s8_0_p1_p0_z4_z0: -+** sumops za0\.s, p1/m, p0/m, z4\.b, z0\.b -+** ret -+*/ -+TEST_DUAL_ZA (sumops_za32_s8_0_p1_p0_z4_z0, svuint8_t, svint8_t, -+ svsumops_za32_s8_m (0, p1, p0, z4, z0), -+ svsumops_za32_m (0, p1, p0, z4, z0)) -+ -+/* -+** sumops_za32_s8_3_p0_p1_z0_z4: -+** sumops za3\.s, p0/m, p1/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_ZA (sumops_za32_s8_3_p0_p1_z0_z4, svint8_t, svuint8_t, -+ svsumops_za32_s8_m (3, p0, p1, z0, z4), -+ svsumops_za32_m (3, p0, p1, z0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za64.c -new file mode 100644 -index 000000000..910a45b29 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/sumops_za64.c -@@ -0,0 +1,32 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+#pragma GCC target "+sme-i16i64" -+ -+/* -+** sumops_za64_s16_0_p0_p1_z0_z4: -+** sumops za0\.d, p0/m, p1/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_ZA (sumops_za64_s16_0_p0_p1_z0_z4, svint16_t, svuint16_t, -+ svsumops_za64_s16_m (0, p0, p1, z0, z4), -+ svsumops_za64_m (0, p0, p1, z0, z4)) -+ -+/* -+** sumops_za64_s16_0_p1_p0_z4_z0: -+** sumops za0\.d, p1/m, p0/m, z4\.h, z0\.h -+** ret -+*/ -+TEST_DUAL_ZA (sumops_za64_s16_0_p1_p0_z4_z0, svuint16_t, svint16_t, -+ svsumops_za64_s16_m (0, p1, p0, z4, z0), -+ svsumops_za64_m (0, p1, p0, z4, z0)) -+ -+/* -+** sumops_za64_s16_7_p0_p1_z0_z4: -+** sumops za7\.d, p0/m, p1/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_ZA (sumops_za64_s16_7_p0_p1_z0_z4, svint16_t, svuint16_t, -+ svsumops_za64_s16_m (7, p0, p1, z0, z4), -+ svsumops_za64_m (7, p0, p1, z0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h -new file mode 100644 -index 000000000..aaadab2f7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/test_sme_acle.h -@@ -0,0 +1,62 @@ -+#ifndef TEST_SME_ACLE_H -+#define TEST_SME_ACLE_H 1 -+ -+#if (!defined(STREAMING_COMPATIBLE) \ -+ && !defined(NON_STREAMING) \ -+ && !defined(STREAMING)) -+#define STREAMING -+#endif -+ -+#if !defined(NO_SHARED_ZA) -+#define SHARED_ZA -+#endif -+ -+#include "../../sve/acle/asm/test_sve_acle.h" -+ -+#include -+ -+#define TEST_LOAD_ZA(NAME, CODE1, CODE2) \ -+ PROTO (NAME, void, (svbool_t p0, int32_t w0, const char *x1, \ -+ uint64_t x2)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ } -+ -+#define TEST_STORE_ZA(NAME, CODE1, CODE2) \ -+ PROTO (NAME, void, (svbool_t p0, int32_t w0, char *x1, \ -+ uint64_t x2)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ } -+ -+#define TEST_READ_ZA(NAME, TYPE, CODE1, CODE2) \ -+ PROTO (NAME, TYPE, (TYPE z0, TYPE z1, svbool_t p0, \ -+ int32_t w0)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ return z0; \ -+ } -+ -+#define TEST_WRITE_ZA(NAME, TYPE, CODE1, CODE2) \ -+ PROTO (NAME, void, (TYPE z0, TYPE z1, svbool_t p0, \ -+ int32_t w0)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ } -+ -+#define TEST_UNIFORM_ZA(NAME, TYPE, CODE1, CODE2) \ -+ PROTO (NAME, void, (TYPE z0, TYPE z1, svbool_t p0, \ -+ svbool_t p1)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ } -+ -+#define TEST_DUAL_ZA(NAME, TYPE1, TYPE2, CODE1, CODE2) \ -+ PROTO (NAME, void, (TYPE1 z0, TYPE1 z1, TYPE1 z2, TYPE1 z3, \ -+ TYPE2 z4, TYPE2 z5, TYPE2 z6, TYPE2 z7, \ -+ svbool_t p0, svbool_t p1)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ } -+ -+#endif -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/undef_za.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/undef_za.c -new file mode 100644 -index 000000000..5474328fb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/undef_za.c -@@ -0,0 +1,33 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#define STREAMING_COMPATIBLE -+#include "test_sme_acle.h" -+ -+/* -+** undef_za_1: -+** ret -+*/ -+PROTO (undef_za_1, void, ()) { svundef_za (); } -+ -+/* -+** undef_za_2: -+** ret -+*/ -+PROTO (undef_za_2, void, ()) -+{ -+ svzero_za (); -+ svundef_za (); -+} -+ -+/* -+** undef_za_3: -+** mov (w1[2-5]), (?:wzr|#?0) -+** str za\[\1, 0\], \[x0(?:, #0, mul vl)\] -+** ret -+*/ -+PROTO (undef_za_3, void, (void *ptr)) -+{ -+ svzero_za (); -+ svundef_za (); -+ svstr_za (0, ptr); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za32.c -new file mode 100644 -index 000000000..bbc0b6c11 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** usmopa_za32_u8_0_p0_p1_z0_z4: -+** usmopa za0\.s, p0/m, p1/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_ZA (usmopa_za32_u8_0_p0_p1_z0_z4, svuint8_t, svint8_t, -+ svusmopa_za32_u8_m (0, p0, p1, z0, z4), -+ svusmopa_za32_m (0, p0, p1, z0, z4)) -+ -+/* -+** usmopa_za32_u8_0_p1_p0_z4_z0: -+** usmopa za0\.s, p1/m, p0/m, z4\.b, z0\.b -+** ret -+*/ -+TEST_DUAL_ZA (usmopa_za32_u8_0_p1_p0_z4_z0, svint8_t, svuint8_t, -+ svusmopa_za32_u8_m (0, p1, p0, z4, z0), -+ svusmopa_za32_m (0, p1, p0, z4, z0)) -+ -+/* -+** usmopa_za32_u8_3_p0_p1_z0_z4: -+** usmopa za3\.s, p0/m, p1/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_ZA (usmopa_za32_u8_3_p0_p1_z0_z4, svuint8_t, svint8_t, -+ svusmopa_za32_u8_m (3, p0, p1, z0, z4), -+ svusmopa_za32_m (3, p0, p1, z0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za64.c -new file mode 100644 -index 000000000..64ee25bc7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmopa_za64.c -@@ -0,0 +1,32 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+#pragma GCC target "+sme-i16i64" -+ -+/* -+** usmopa_za64_u16_0_p0_p1_z0_z4: -+** usmopa za0\.d, p0/m, p1/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_ZA (usmopa_za64_u16_0_p0_p1_z0_z4, svuint16_t, svint16_t, -+ svusmopa_za64_u16_m (0, p0, p1, z0, z4), -+ svusmopa_za64_m (0, p0, p1, z0, z4)) -+ -+/* -+** usmopa_za64_u16_0_p1_p0_z4_z0: -+** usmopa za0\.d, p1/m, p0/m, z4\.h, z0\.h -+** ret -+*/ -+TEST_DUAL_ZA (usmopa_za64_u16_0_p1_p0_z4_z0, svint16_t, svuint16_t, -+ svusmopa_za64_u16_m (0, p1, p0, z4, z0), -+ svusmopa_za64_m (0, p1, p0, z4, z0)) -+ -+/* -+** usmopa_za64_u16_7_p0_p1_z0_z4: -+** usmopa za7\.d, p0/m, p1/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_ZA (usmopa_za64_u16_7_p0_p1_z0_z4, svuint16_t, svint16_t, -+ svusmopa_za64_u16_m (7, p0, p1, z0, z4), -+ svusmopa_za64_m (7, p0, p1, z0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za32.c -new file mode 100644 -index 000000000..98fd33157 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** usmops_za32_u8_0_p0_p1_z0_z4: -+** usmops za0\.s, p0/m, p1/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_ZA (usmops_za32_u8_0_p0_p1_z0_z4, svuint8_t, svint8_t, -+ svusmops_za32_u8_m (0, p0, p1, z0, z4), -+ svusmops_za32_m (0, p0, p1, z0, z4)) -+ -+/* -+** usmops_za32_u8_0_p1_p0_z4_z0: -+** usmops za0\.s, p1/m, p0/m, z4\.b, z0\.b -+** ret -+*/ -+TEST_DUAL_ZA (usmops_za32_u8_0_p1_p0_z4_z0, svint8_t, svuint8_t, -+ svusmops_za32_u8_m (0, p1, p0, z4, z0), -+ svusmops_za32_m (0, p1, p0, z4, z0)) -+ -+/* -+** usmops_za32_u8_3_p0_p1_z0_z4: -+** usmops za3\.s, p0/m, p1/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_ZA (usmops_za32_u8_3_p0_p1_z0_z4, svuint8_t, svint8_t, -+ svusmops_za32_u8_m (3, p0, p1, z0, z4), -+ svusmops_za32_m (3, p0, p1, z0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za64.c -new file mode 100644 -index 000000000..e20cdab41 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/usmops_za64.c -@@ -0,0 +1,32 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+#pragma GCC target "+sme-i16i64" -+ -+/* -+** usmops_za64_u16_0_p0_p1_z0_z4: -+** usmops za0\.d, p0/m, p1/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_ZA (usmops_za64_u16_0_p0_p1_z0_z4, svuint16_t, svint16_t, -+ svusmops_za64_u16_m (0, p0, p1, z0, z4), -+ svusmops_za64_m (0, p0, p1, z0, z4)) -+ -+/* -+** usmops_za64_u16_0_p1_p0_z4_z0: -+** usmops za0\.d, p1/m, p0/m, z4\.h, z0\.h -+** ret -+*/ -+TEST_DUAL_ZA (usmops_za64_u16_0_p1_p0_z4_z0, svint16_t, svuint16_t, -+ svusmops_za64_u16_m (0, p1, p0, z4, z0), -+ svusmops_za64_m (0, p1, p0, z4, z0)) -+ -+/* -+** usmops_za64_u16_7_p0_p1_z0_z4: -+** usmops za7\.d, p0/m, p1/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_ZA (usmops_za64_u16_7_p0_p1_z0_z4, svuint16_t, svint16_t, -+ svusmops_za64_u16_m (7, p0, p1, z0, z4), -+ svusmops_za64_m (7, p0, p1, z0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c -new file mode 100644 -index 000000000..119a2535e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za128.c -@@ -0,0 +1,193 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** write_za128_s8_0_0_z0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova za0h\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s8_0_0_z0, svint8_t, -+ svwrite_hor_za128_s8_m (0, 0, p0, z0), -+ svwrite_hor_za128_m (0, 0, p0, z0)) -+ -+/* -+** write_za128_s8_0_1_z0: -+** mov (w1[2-5]), #?1 -+** mova za0h\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s8_0_1_z0, svint8_t, -+ svwrite_hor_za128_s8_m (0, 1, p0, z0), -+ svwrite_hor_za128_m (0, 1, p0, z0)) -+ -+/* -+** write_za128_s8_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s8_0_w0_z0, svint8_t, -+ svwrite_hor_za128_s8_m (0, w0, p0, z0), -+ svwrite_hor_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_s8_0_w0p1_z0: -+** add (w1[2-5]), w0, #?1 -+** mova za0h\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s8_0_w0p1_z0, svint8_t, -+ svwrite_hor_za128_s8_m (0, w0 + 1, p0, z0), -+ svwrite_hor_za128_m (0, w0 + 1, p0, z0)) -+ -+/* -+** write_za128_s8_0_w0m1_z0: -+** sub (w1[2-5]), w0, #?1 -+** mova za0h\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s8_0_w0m1_z0, svint8_t, -+ svwrite_hor_za128_s8_m (0, w0 - 1, p0, z0), -+ svwrite_hor_za128_m (0, w0 - 1, p0, z0)) -+ -+/* -+** write_za128_s8_1_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za1h\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s8_1_w0_z0, svint8_t, -+ svwrite_hor_za128_s8_m (1, w0, p0, z0), -+ svwrite_hor_za128_m (1, w0, p0, z0)) -+ -+/* -+** write_za128_s8_15_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za15h\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s8_15_w0_z0, svint8_t, -+ svwrite_hor_za128_s8_m (15, w0, p0, z0), -+ svwrite_hor_za128_m (15, w0, p0, z0)) -+ -+/* -+** write_za128_s8_0_w0_z1: -+** mov (w1[2-5]), w0 -+** mova za0h\.q\[\1, 0\], p0/m, z1\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s8_0_w0_z1, svint8_t, -+ svwrite_hor_za128_s8_m (0, w0, p0, z1), -+ svwrite_hor_za128_m (0, w0, p0, z1)) -+ -+/* -+** write_za128_u8_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_u8_0_w0_z0, svuint8_t, -+ svwrite_hor_za128_u8_m (0, w0, p0, z0), -+ svwrite_hor_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_s16_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s16_0_w0_z0, svint16_t, -+ svwrite_hor_za128_s16_m (0, w0, p0, z0), -+ svwrite_hor_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_u16_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_u16_0_w0_z0, svuint16_t, -+ svwrite_hor_za128_u16_m (0, w0, p0, z0), -+ svwrite_hor_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_f16_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_f16_0_w0_z0, svfloat16_t, -+ svwrite_hor_za128_f16_m (0, w0, p0, z0), -+ svwrite_hor_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_bf16_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_bf16_0_w0_z0, svbfloat16_t, -+ svwrite_hor_za128_bf16_m (0, w0, p0, z0), -+ svwrite_hor_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_s32_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s32_0_w0_z0, svint32_t, -+ svwrite_hor_za128_s32_m (0, w0, p0, z0), -+ svwrite_hor_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_u32_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_u32_0_w0_z0, svuint32_t, -+ svwrite_hor_za128_u32_m (0, w0, p0, z0), -+ svwrite_hor_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_f32_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_f32_0_w0_z0, svfloat32_t, -+ svwrite_hor_za128_f32_m (0, w0, p0, z0), -+ svwrite_hor_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_s64_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s64_0_w0_z0, svint64_t, -+ svwrite_hor_za128_s64_m (0, w0, p0, z0), -+ svwrite_hor_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_u64_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_u64_0_w0_z0, svuint64_t, -+ svwrite_hor_za128_u64_m (0, w0, p0, z0), -+ svwrite_hor_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_f64_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_f64_0_w0_z0, svfloat64_t, -+ svwrite_hor_za128_f64_m (0, w0, p0, z0), -+ svwrite_hor_za128_m (0, w0, p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za16.c -new file mode 100644 -index 000000000..c8f13f7bc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za16.c -@@ -0,0 +1,133 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** write_za16_s16_0_0_z0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova za0h\.h\[\1, 0\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_0_0_z0, svint16_t, -+ svwrite_hor_za16_s16_m (0, 0, p0, z0), -+ svwrite_hor_za16_m (0, 0, p0, z0)) -+ -+/* -+** write_za16_s16_0_1_z0: -+** mov (w1[2-5]), #?1 -+** mova za0h\.h\[\1, 0\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_0_1_z0, svint16_t, -+ svwrite_hor_za16_s16_m (0, 1, p0, z0), -+ svwrite_hor_za16_m (0, 1, p0, z0)) -+ -+/* -+** write_za16_s16_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.h\[\1, 0\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_0_w0_z0, svint16_t, -+ svwrite_hor_za16_s16_m (0, w0, p0, z0), -+ svwrite_hor_za16_m (0, w0, p0, z0)) -+ -+/* -+** write_za16_s16_0_w0p1_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.h\[\1, 1\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_0_w0p1_z0, svint16_t, -+ svwrite_hor_za16_s16_m (0, w0 + 1, p0, z0), -+ svwrite_hor_za16_m (0, w0 + 1, p0, z0)) -+ -+/* -+** write_za16_s16_0_w0p7_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.h\[\1, 7\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_0_w0p7_z0, svint16_t, -+ svwrite_hor_za16_s16_m (0, w0 + 7, p0, z0), -+ svwrite_hor_za16_m (0, w0 + 7, p0, z0)) -+ -+/* -+** write_za16_s16_0_w0p8_z0: -+** add (w1[2-5]), w0, #?8 -+** mova za0h\.h\[\1, 0\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_0_w0p8_z0, svint16_t, -+ svwrite_hor_za16_s16_m (0, w0 + 8, p0, z0), -+ svwrite_hor_za16_m (0, w0 + 8, p0, z0)) -+ -+/* -+** write_za16_s16_0_w0m1_z0: -+** sub (w1[2-5]), w0, #?1 -+** mova za0h\.h\[\1, 0\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_0_w0m1_z0, svint16_t, -+ svwrite_hor_za16_s16_m (0, w0 - 1, p0, z0), -+ svwrite_hor_za16_m (0, w0 - 1, p0, z0)) -+ -+/* -+** write_za16_s16_1_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za1h\.h\[\1, 0\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_1_w0_z0, svint16_t, -+ svwrite_hor_za16_s16_m (1, w0, p0, z0), -+ svwrite_hor_za16_m (1, w0, p0, z0)) -+ -+/* -+** write_za16_s16_1_w0p7_z0: -+** mov (w1[2-5]), w0 -+** mova za1h\.h\[\1, 7\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_1_w0p7_z0, svint16_t, -+ svwrite_hor_za16_s16_m (1, w0 + 7, p0, z0), -+ svwrite_hor_za16_m (1, w0 + 7, p0, z0)) -+ -+/* -+** write_za16_s16_0_w0_z1: -+** mov (w1[2-5]), w0 -+** mova za0h\.h\[\1, 0\], p0/m, z1\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_0_w0_z1, svint16_t, -+ svwrite_hor_za16_s16_m (0, w0, p0, z1), -+ svwrite_hor_za16_m (0, w0, p0, z1)) -+ -+/* -+** write_za16_u16_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.h\[\1, 0\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_u16_0_w0_z0, svuint16_t, -+ svwrite_hor_za16_u16_m (0, w0, p0, z0), -+ svwrite_hor_za16_m (0, w0, p0, z0)) -+ -+/* -+** write_za16_f16_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.h\[\1, 0\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_f16_0_w0_z0, svfloat16_t, -+ svwrite_hor_za16_f16_m (0, w0, p0, z0), -+ svwrite_hor_za16_m (0, w0, p0, z0)) -+ -+/* -+** write_za16_bf16_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.h\[\1, 0\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_bf16_0_w0_z0, svbfloat16_t, -+ svwrite_hor_za16_bf16_m (0, w0, p0, z0), -+ svwrite_hor_za16_m (0, w0, p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za32.c -new file mode 100644 -index 000000000..ea2f5ae89 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za32.c -@@ -0,0 +1,143 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** write_za32_s32_0_0_z0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova za0h\.s\[\1, 0\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_0_0_z0, svint32_t, -+ svwrite_hor_za32_s32_m (0, 0, p0, z0), -+ svwrite_hor_za32_m (0, 0, p0, z0)) -+ -+/* -+** write_za32_s32_0_1_z0: -+** mov (w1[2-5]), #?1 -+** mova za0h\.s\[\1, 0\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_0_1_z0, svint32_t, -+ svwrite_hor_za32_s32_m (0, 1, p0, z0), -+ svwrite_hor_za32_m (0, 1, p0, z0)) -+ -+/* -+** write_za32_s32_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.s\[\1, 0\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_0_w0_z0, svint32_t, -+ svwrite_hor_za32_s32_m (0, w0, p0, z0), -+ svwrite_hor_za32_m (0, w0, p0, z0)) -+ -+/* -+** write_za32_s32_0_w0p1_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.s\[\1, 1\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_0_w0p1_z0, svint32_t, -+ svwrite_hor_za32_s32_m (0, w0 + 1, p0, z0), -+ svwrite_hor_za32_m (0, w0 + 1, p0, z0)) -+ -+/* -+** write_za32_s32_0_w0p3_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.s\[\1, 3\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_0_w0p3_z0, svint32_t, -+ svwrite_hor_za32_s32_m (0, w0 + 3, p0, z0), -+ svwrite_hor_za32_m (0, w0 + 3, p0, z0)) -+ -+/* -+** write_za32_s32_0_w0p4_z0: -+** add (w1[2-5]), w0, #?4 -+** mova za0h\.s\[\1, 0\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_0_w0p4_z0, svint32_t, -+ svwrite_hor_za32_s32_m (0, w0 + 4, p0, z0), -+ svwrite_hor_za32_m (0, w0 + 4, p0, z0)) -+ -+/* -+** write_za32_s32_0_w0m1_z0: -+** sub (w1[2-5]), w0, #?1 -+** mova za0h\.s\[\1, 0\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_0_w0m1_z0, svint32_t, -+ svwrite_hor_za32_s32_m (0, w0 - 1, p0, z0), -+ svwrite_hor_za32_m (0, w0 - 1, p0, z0)) -+ -+/* -+** write_za32_s32_1_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za1h\.s\[\1, 0\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_1_w0_z0, svint32_t, -+ svwrite_hor_za32_s32_m (1, w0, p0, z0), -+ svwrite_hor_za32_m (1, w0, p0, z0)) -+ -+/* -+** write_za32_s32_1_w0p3_z0: -+** mov (w1[2-5]), w0 -+** mova za1h\.s\[\1, 3\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_1_w0p3_z0, svint32_t, -+ svwrite_hor_za32_s32_m (1, w0 + 3, p0, z0), -+ svwrite_hor_za32_m (1, w0 + 3, p0, z0)) -+ -+/* -+** write_za32_s32_3_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za3h\.s\[\1, 0\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_3_w0_z0, svint32_t, -+ svwrite_hor_za32_s32_m (3, w0, p0, z0), -+ svwrite_hor_za32_m (3, w0, p0, z0)) -+ -+/* -+** write_za32_s32_3_w0p3_z0: -+** mov (w1[2-5]), w0 -+** mova za3h\.s\[\1, 3\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_3_w0p3_z0, svint32_t, -+ svwrite_hor_za32_s32_m (3, w0 + 3, p0, z0), -+ svwrite_hor_za32_m (3, w0 + 3, p0, z0)) -+ -+/* -+** write_za32_s32_0_w0_z1: -+** mov (w1[2-5]), w0 -+** mova za0h\.s\[\1, 0\], p0/m, z1\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_0_w0_z1, svint32_t, -+ svwrite_hor_za32_s32_m (0, w0, p0, z1), -+ svwrite_hor_za32_m (0, w0, p0, z1)) -+ -+/* -+** write_za32_u32_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.s\[\1, 0\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_u32_0_w0_z0, svuint32_t, -+ svwrite_hor_za32_u32_m (0, w0, p0, z0), -+ svwrite_hor_za32_m (0, w0, p0, z0)) -+ -+/* -+** write_za32_f32_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.s\[\1, 0\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_f32_0_w0_z0, svfloat32_t, -+ svwrite_hor_za32_f32_m (0, w0, p0, z0), -+ svwrite_hor_za32_m (0, w0, p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za64.c -new file mode 100644 -index 000000000..2b0a157d2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za64.c -@@ -0,0 +1,133 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** write_za64_s64_0_0_z0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova za0h\.d\[\1, 0\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_0_0_z0, svint64_t, -+ svwrite_hor_za64_s64_m (0, 0, p0, z0), -+ svwrite_hor_za64_m (0, 0, p0, z0)) -+ -+/* -+** write_za64_s64_0_1_z0: -+** mov (w1[2-5]), #?1 -+** mova za0h\.d\[\1, 0\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_0_1_z0, svint64_t, -+ svwrite_hor_za64_s64_m (0, 1, p0, z0), -+ svwrite_hor_za64_m (0, 1, p0, z0)) -+ -+/* -+** write_za64_s64_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.d\[\1, 0\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_0_w0_z0, svint64_t, -+ svwrite_hor_za64_s64_m (0, w0, p0, z0), -+ svwrite_hor_za64_m (0, w0, p0, z0)) -+ -+/* -+** write_za64_s64_0_w0p1_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.d\[\1, 1\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_0_w0p1_z0, svint64_t, -+ svwrite_hor_za64_s64_m (0, w0 + 1, p0, z0), -+ svwrite_hor_za64_m (0, w0 + 1, p0, z0)) -+ -+/* -+** write_za64_s64_0_w0p2_z0: -+** add (w1[2-5]), w0, #?2 -+** mova za0h\.d\[\1, 0\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_0_w0p2_z0, svint64_t, -+ svwrite_hor_za64_s64_m (0, w0 + 2, p0, z0), -+ svwrite_hor_za64_m (0, w0 + 2, p0, z0)) -+ -+/* -+** write_za64_s64_0_w0m1_z0: -+** sub (w1[2-5]), w0, #?1 -+** mova za0h\.d\[\1, 0\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_0_w0m1_z0, svint64_t, -+ svwrite_hor_za64_s64_m (0, w0 - 1, p0, z0), -+ svwrite_hor_za64_m (0, w0 - 1, p0, z0)) -+ -+/* -+** write_za64_s64_1_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za1h\.d\[\1, 0\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_1_w0_z0, svint64_t, -+ svwrite_hor_za64_s64_m (1, w0, p0, z0), -+ svwrite_hor_za64_m (1, w0, p0, z0)) -+ -+/* -+** write_za64_s64_1_w0p1_z0: -+** mov (w1[2-5]), w0 -+** mova za1h\.d\[\1, 1\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_1_w0p1_z0, svint64_t, -+ svwrite_hor_za64_s64_m (1, w0 + 1, p0, z0), -+ svwrite_hor_za64_m (1, w0 + 1, p0, z0)) -+ -+/* -+** write_za64_s64_7_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za7h\.d\[\1, 0\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_7_w0_z0, svint64_t, -+ svwrite_hor_za64_s64_m (7, w0, p0, z0), -+ svwrite_hor_za64_m (7, w0, p0, z0)) -+ -+/* -+** write_za64_s64_7_w0p1_z0: -+** mov (w1[2-5]), w0 -+** mova za7h\.d\[\1, 1\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_7_w0p1_z0, svint64_t, -+ svwrite_hor_za64_s64_m (7, w0 + 1, p0, z0), -+ svwrite_hor_za64_m (7, w0 + 1, p0, z0)) -+ -+/* -+** write_za64_s64_0_w0_z1: -+** mov (w1[2-5]), w0 -+** mova za0h\.d\[\1, 0\], p0/m, z1\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_0_w0_z1, svint64_t, -+ svwrite_hor_za64_s64_m (0, w0, p0, z1), -+ svwrite_hor_za64_m (0, w0, p0, z1)) -+ -+/* -+** write_za64_u64_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.d\[\1, 0\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_u64_0_w0_z0, svuint64_t, -+ svwrite_hor_za64_u64_m (0, w0, p0, z0), -+ svwrite_hor_za64_m (0, w0, p0, z0)) -+ -+/* -+** write_za64_f64_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.d\[\1, 0\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_f64_0_w0_z0, svfloat64_t, -+ svwrite_hor_za64_f64_m (0, w0, p0, z0), -+ svwrite_hor_za64_m (0, w0, p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c -new file mode 100644 -index 000000000..683e1a64a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_hor_za8.c -@@ -0,0 +1,93 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** write_za8_s8_0_0_z0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova za0h\.b\[\1, 0\], p0/m, z0\.b -+** ret -+*/ -+TEST_WRITE_ZA (write_za8_s8_0_0_z0, svint8_t, -+ svwrite_hor_za8_s8_m (0, 0, p0, z0), -+ svwrite_hor_za8_m (0, 0, p0, z0)) -+ -+/* -+** write_za8_s8_0_1_z0: -+** mov (w1[2-5]), #?1 -+** mova za0h\.b\[\1, 0\], p0/m, z0\.b -+** ret -+*/ -+TEST_WRITE_ZA (write_za8_s8_0_1_z0, svint8_t, -+ svwrite_hor_za8_s8_m (0, 1, p0, z0), -+ svwrite_hor_za8_m (0, 1, p0, z0)) -+ -+/* -+** write_za8_s8_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.b\[\1, 0\], p0/m, z0\.b -+** ret -+*/ -+TEST_WRITE_ZA (write_za8_s8_0_w0_z0, svint8_t, -+ svwrite_hor_za8_s8_m (0, w0, p0, z0), -+ svwrite_hor_za8_m (0, w0, p0, z0)) -+ -+/* -+** write_za8_s8_0_w0p1_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.b\[\1, 1\], p0/m, z0\.b -+** ret -+*/ -+TEST_WRITE_ZA (write_za8_s8_0_w0p1_z0, svint8_t, -+ svwrite_hor_za8_s8_m (0, w0 + 1, p0, z0), -+ svwrite_hor_za8_m (0, w0 + 1, p0, z0)) -+ -+/* -+** write_za8_s8_0_w0p15_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.b\[\1, 15\], p0/m, z0\.b -+** ret -+*/ -+TEST_WRITE_ZA (write_za8_s8_0_w0p15_z0, svint8_t, -+ svwrite_hor_za8_s8_m (0, w0 + 15, p0, z0), -+ svwrite_hor_za8_m (0, w0 + 15, p0, z0)) -+ -+/* -+** write_za8_s8_0_w0p16_z0: -+** add (w1[2-5]), w0, #?16 -+** mova za0h\.b\[\1, 0\], p0/m, z0\.b -+** ret -+*/ -+TEST_WRITE_ZA (write_za8_s8_0_w0p16_z0, svint8_t, -+ svwrite_hor_za8_s8_m (0, w0 + 16, p0, z0), -+ svwrite_hor_za8_m (0, w0 + 16, p0, z0)) -+ -+/* -+** write_za8_s8_0_w0m1_z0: -+** sub (w1[2-5]), w0, #?1 -+** mova za0h\.b\[\1, 0\], p0/m, z0\.b -+** ret -+*/ -+TEST_WRITE_ZA (write_za8_s8_0_w0m1_z0, svint8_t, -+ svwrite_hor_za8_s8_m (0, w0 - 1, p0, z0), -+ svwrite_hor_za8_m (0, w0 - 1, p0, z0)) -+ -+/* -+** write_za8_s8_0_w0_z1: -+** mov (w1[2-5]), w0 -+** mova za0h\.b\[\1, 0\], p0/m, z1\.b -+** ret -+*/ -+TEST_WRITE_ZA (write_za8_s8_0_w0_z1, svint8_t, -+ svwrite_hor_za8_s8_m (0, w0, p0, z1), -+ svwrite_hor_za8_m (0, w0, p0, z1)) -+ -+/* -+** write_za8_u8_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0h\.b\[\1, 0\], p0/m, z0\.b -+** ret -+*/ -+TEST_WRITE_ZA (write_za8_u8_0_w0_z0, svuint8_t, -+ svwrite_hor_za8_u8_m (0, w0, p0, z0), -+ svwrite_hor_za8_m (0, w0, p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c -new file mode 100644 -index 000000000..9622e99dd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za128.c -@@ -0,0 +1,193 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** write_za128_s8_0_0_z0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova za0v\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s8_0_0_z0, svint8_t, -+ svwrite_ver_za128_s8_m (0, 0, p0, z0), -+ svwrite_ver_za128_m (0, 0, p0, z0)) -+ -+/* -+** write_za128_s8_0_1_z0: -+** mov (w1[2-5]), #?1 -+** mova za0v\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s8_0_1_z0, svint8_t, -+ svwrite_ver_za128_s8_m (0, 1, p0, z0), -+ svwrite_ver_za128_m (0, 1, p0, z0)) -+ -+/* -+** write_za128_s8_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s8_0_w0_z0, svint8_t, -+ svwrite_ver_za128_s8_m (0, w0, p0, z0), -+ svwrite_ver_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_s8_0_w0p1_z0: -+** add (w1[2-5]), w0, #?1 -+** mova za0v\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s8_0_w0p1_z0, svint8_t, -+ svwrite_ver_za128_s8_m (0, w0 + 1, p0, z0), -+ svwrite_ver_za128_m (0, w0 + 1, p0, z0)) -+ -+/* -+** write_za128_s8_0_w0m1_z0: -+** sub (w1[2-5]), w0, #?1 -+** mova za0v\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s8_0_w0m1_z0, svint8_t, -+ svwrite_ver_za128_s8_m (0, w0 - 1, p0, z0), -+ svwrite_ver_za128_m (0, w0 - 1, p0, z0)) -+ -+/* -+** write_za128_s8_1_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za1v\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s8_1_w0_z0, svint8_t, -+ svwrite_ver_za128_s8_m (1, w0, p0, z0), -+ svwrite_ver_za128_m (1, w0, p0, z0)) -+ -+/* -+** write_za128_s8_15_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za15v\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s8_15_w0_z0, svint8_t, -+ svwrite_ver_za128_s8_m (15, w0, p0, z0), -+ svwrite_ver_za128_m (15, w0, p0, z0)) -+ -+/* -+** write_za128_s8_0_w0_z1: -+** mov (w1[2-5]), w0 -+** mova za0v\.q\[\1, 0\], p0/m, z1\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s8_0_w0_z1, svint8_t, -+ svwrite_ver_za128_s8_m (0, w0, p0, z1), -+ svwrite_ver_za128_m (0, w0, p0, z1)) -+ -+/* -+** write_za128_u8_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_u8_0_w0_z0, svuint8_t, -+ svwrite_ver_za128_u8_m (0, w0, p0, z0), -+ svwrite_ver_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_s16_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s16_0_w0_z0, svint16_t, -+ svwrite_ver_za128_s16_m (0, w0, p0, z0), -+ svwrite_ver_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_u16_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_u16_0_w0_z0, svuint16_t, -+ svwrite_ver_za128_u16_m (0, w0, p0, z0), -+ svwrite_ver_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_f16_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_f16_0_w0_z0, svfloat16_t, -+ svwrite_ver_za128_f16_m (0, w0, p0, z0), -+ svwrite_ver_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_bf16_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_bf16_0_w0_z0, svbfloat16_t, -+ svwrite_ver_za128_bf16_m (0, w0, p0, z0), -+ svwrite_ver_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_s32_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s32_0_w0_z0, svint32_t, -+ svwrite_ver_za128_s32_m (0, w0, p0, z0), -+ svwrite_ver_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_u32_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_u32_0_w0_z0, svuint32_t, -+ svwrite_ver_za128_u32_m (0, w0, p0, z0), -+ svwrite_ver_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_f32_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_f32_0_w0_z0, svfloat32_t, -+ svwrite_ver_za128_f32_m (0, w0, p0, z0), -+ svwrite_ver_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_s64_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_s64_0_w0_z0, svint64_t, -+ svwrite_ver_za128_s64_m (0, w0, p0, z0), -+ svwrite_ver_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_u64_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_u64_0_w0_z0, svuint64_t, -+ svwrite_ver_za128_u64_m (0, w0, p0, z0), -+ svwrite_ver_za128_m (0, w0, p0, z0)) -+ -+/* -+** write_za128_f64_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.q\[\1, 0\], p0/m, z0\.q -+** ret -+*/ -+TEST_WRITE_ZA (write_za128_f64_0_w0_z0, svfloat64_t, -+ svwrite_ver_za128_f64_m (0, w0, p0, z0), -+ svwrite_ver_za128_m (0, w0, p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za16.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za16.c -new file mode 100644 -index 000000000..5430f2307 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za16.c -@@ -0,0 +1,133 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** write_za16_s16_0_0_z0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova za0v\.h\[\1, 0\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_0_0_z0, svint16_t, -+ svwrite_ver_za16_s16_m (0, 0, p0, z0), -+ svwrite_ver_za16_m (0, 0, p0, z0)) -+ -+/* -+** write_za16_s16_0_1_z0: -+** mov (w1[2-5]), #?1 -+** mova za0v\.h\[\1, 0\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_0_1_z0, svint16_t, -+ svwrite_ver_za16_s16_m (0, 1, p0, z0), -+ svwrite_ver_za16_m (0, 1, p0, z0)) -+ -+/* -+** write_za16_s16_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.h\[\1, 0\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_0_w0_z0, svint16_t, -+ svwrite_ver_za16_s16_m (0, w0, p0, z0), -+ svwrite_ver_za16_m (0, w0, p0, z0)) -+ -+/* -+** write_za16_s16_0_w0p1_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.h\[\1, 1\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_0_w0p1_z0, svint16_t, -+ svwrite_ver_za16_s16_m (0, w0 + 1, p0, z0), -+ svwrite_ver_za16_m (0, w0 + 1, p0, z0)) -+ -+/* -+** write_za16_s16_0_w0p7_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.h\[\1, 7\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_0_w0p7_z0, svint16_t, -+ svwrite_ver_za16_s16_m (0, w0 + 7, p0, z0), -+ svwrite_ver_za16_m (0, w0 + 7, p0, z0)) -+ -+/* -+** write_za16_s16_0_w0p8_z0: -+** add (w1[2-5]), w0, #?8 -+** mova za0v\.h\[\1, 0\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_0_w0p8_z0, svint16_t, -+ svwrite_ver_za16_s16_m (0, w0 + 8, p0, z0), -+ svwrite_ver_za16_m (0, w0 + 8, p0, z0)) -+ -+/* -+** write_za16_s16_0_w0m1_z0: -+** sub (w1[2-5]), w0, #?1 -+** mova za0v\.h\[\1, 0\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_0_w0m1_z0, svint16_t, -+ svwrite_ver_za16_s16_m (0, w0 - 1, p0, z0), -+ svwrite_ver_za16_m (0, w0 - 1, p0, z0)) -+ -+/* -+** write_za16_s16_1_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za1v\.h\[\1, 0\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_1_w0_z0, svint16_t, -+ svwrite_ver_za16_s16_m (1, w0, p0, z0), -+ svwrite_ver_za16_m (1, w0, p0, z0)) -+ -+/* -+** write_za16_s16_1_w0p7_z0: -+** mov (w1[2-5]), w0 -+** mova za1v\.h\[\1, 7\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_1_w0p7_z0, svint16_t, -+ svwrite_ver_za16_s16_m (1, w0 + 7, p0, z0), -+ svwrite_ver_za16_m (1, w0 + 7, p0, z0)) -+ -+/* -+** write_za16_s16_0_w0_z1: -+** mov (w1[2-5]), w0 -+** mova za0v\.h\[\1, 0\], p0/m, z1\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_s16_0_w0_z1, svint16_t, -+ svwrite_ver_za16_s16_m (0, w0, p0, z1), -+ svwrite_ver_za16_m (0, w0, p0, z1)) -+ -+/* -+** write_za16_u16_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.h\[\1, 0\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_u16_0_w0_z0, svuint16_t, -+ svwrite_ver_za16_u16_m (0, w0, p0, z0), -+ svwrite_ver_za16_m (0, w0, p0, z0)) -+ -+/* -+** write_za16_f16_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.h\[\1, 0\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_f16_0_w0_z0, svfloat16_t, -+ svwrite_ver_za16_f16_m (0, w0, p0, z0), -+ svwrite_ver_za16_m (0, w0, p0, z0)) -+ -+/* -+** write_za16_bf16_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.h\[\1, 0\], p0/m, z0\.h -+** ret -+*/ -+TEST_WRITE_ZA (write_za16_bf16_0_w0_z0, svbfloat16_t, -+ svwrite_ver_za16_bf16_m (0, w0, p0, z0), -+ svwrite_ver_za16_m (0, w0, p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za32.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za32.c -new file mode 100644 -index 000000000..960ce163d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za32.c -@@ -0,0 +1,143 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** write_za32_s32_0_0_z0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova za0v\.s\[\1, 0\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_0_0_z0, svint32_t, -+ svwrite_ver_za32_s32_m (0, 0, p0, z0), -+ svwrite_ver_za32_m (0, 0, p0, z0)) -+ -+/* -+** write_za32_s32_0_1_z0: -+** mov (w1[2-5]), #?1 -+** mova za0v\.s\[\1, 0\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_0_1_z0, svint32_t, -+ svwrite_ver_za32_s32_m (0, 1, p0, z0), -+ svwrite_ver_za32_m (0, 1, p0, z0)) -+ -+/* -+** write_za32_s32_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.s\[\1, 0\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_0_w0_z0, svint32_t, -+ svwrite_ver_za32_s32_m (0, w0, p0, z0), -+ svwrite_ver_za32_m (0, w0, p0, z0)) -+ -+/* -+** write_za32_s32_0_w0p1_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.s\[\1, 1\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_0_w0p1_z0, svint32_t, -+ svwrite_ver_za32_s32_m (0, w0 + 1, p0, z0), -+ svwrite_ver_za32_m (0, w0 + 1, p0, z0)) -+ -+/* -+** write_za32_s32_0_w0p3_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.s\[\1, 3\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_0_w0p3_z0, svint32_t, -+ svwrite_ver_za32_s32_m (0, w0 + 3, p0, z0), -+ svwrite_ver_za32_m (0, w0 + 3, p0, z0)) -+ -+/* -+** write_za32_s32_0_w0p4_z0: -+** add (w1[2-5]), w0, #?4 -+** mova za0v\.s\[\1, 0\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_0_w0p4_z0, svint32_t, -+ svwrite_ver_za32_s32_m (0, w0 + 4, p0, z0), -+ svwrite_ver_za32_m (0, w0 + 4, p0, z0)) -+ -+/* -+** write_za32_s32_0_w0m1_z0: -+** sub (w1[2-5]), w0, #?1 -+** mova za0v\.s\[\1, 0\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_0_w0m1_z0, svint32_t, -+ svwrite_ver_za32_s32_m (0, w0 - 1, p0, z0), -+ svwrite_ver_za32_m (0, w0 - 1, p0, z0)) -+ -+/* -+** write_za32_s32_1_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za1v\.s\[\1, 0\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_1_w0_z0, svint32_t, -+ svwrite_ver_za32_s32_m (1, w0, p0, z0), -+ svwrite_ver_za32_m (1, w0, p0, z0)) -+ -+/* -+** write_za32_s32_1_w0p3_z0: -+** mov (w1[2-5]), w0 -+** mova za1v\.s\[\1, 3\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_1_w0p3_z0, svint32_t, -+ svwrite_ver_za32_s32_m (1, w0 + 3, p0, z0), -+ svwrite_ver_za32_m (1, w0 + 3, p0, z0)) -+ -+/* -+** write_za32_s32_3_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za3v\.s\[\1, 0\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_3_w0_z0, svint32_t, -+ svwrite_ver_za32_s32_m (3, w0, p0, z0), -+ svwrite_ver_za32_m (3, w0, p0, z0)) -+ -+/* -+** write_za32_s32_3_w0p3_z0: -+** mov (w1[2-5]), w0 -+** mova za3v\.s\[\1, 3\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_3_w0p3_z0, svint32_t, -+ svwrite_ver_za32_s32_m (3, w0 + 3, p0, z0), -+ svwrite_ver_za32_m (3, w0 + 3, p0, z0)) -+ -+/* -+** write_za32_s32_0_w0_z1: -+** mov (w1[2-5]), w0 -+** mova za0v\.s\[\1, 0\], p0/m, z1\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_s32_0_w0_z1, svint32_t, -+ svwrite_ver_za32_s32_m (0, w0, p0, z1), -+ svwrite_ver_za32_m (0, w0, p0, z1)) -+ -+/* -+** write_za32_u32_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.s\[\1, 0\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_u32_0_w0_z0, svuint32_t, -+ svwrite_ver_za32_u32_m (0, w0, p0, z0), -+ svwrite_ver_za32_m (0, w0, p0, z0)) -+ -+/* -+** write_za32_f32_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.s\[\1, 0\], p0/m, z0\.s -+** ret -+*/ -+TEST_WRITE_ZA (write_za32_f32_0_w0_z0, svfloat32_t, -+ svwrite_ver_za32_f32_m (0, w0, p0, z0), -+ svwrite_ver_za32_m (0, w0, p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za64.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za64.c -new file mode 100644 -index 000000000..962c4002e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za64.c -@@ -0,0 +1,133 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** write_za64_s64_0_0_z0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova za0v\.d\[\1, 0\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_0_0_z0, svint64_t, -+ svwrite_ver_za64_s64_m (0, 0, p0, z0), -+ svwrite_ver_za64_m (0, 0, p0, z0)) -+ -+/* -+** write_za64_s64_0_1_z0: -+** mov (w1[2-5]), #?1 -+** mova za0v\.d\[\1, 0\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_0_1_z0, svint64_t, -+ svwrite_ver_za64_s64_m (0, 1, p0, z0), -+ svwrite_ver_za64_m (0, 1, p0, z0)) -+ -+/* -+** write_za64_s64_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.d\[\1, 0\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_0_w0_z0, svint64_t, -+ svwrite_ver_za64_s64_m (0, w0, p0, z0), -+ svwrite_ver_za64_m (0, w0, p0, z0)) -+ -+/* -+** write_za64_s64_0_w0p1_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.d\[\1, 1\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_0_w0p1_z0, svint64_t, -+ svwrite_ver_za64_s64_m (0, w0 + 1, p0, z0), -+ svwrite_ver_za64_m (0, w0 + 1, p0, z0)) -+ -+/* -+** write_za64_s64_0_w0p2_z0: -+** add (w1[2-5]), w0, #?2 -+** mova za0v\.d\[\1, 0\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_0_w0p2_z0, svint64_t, -+ svwrite_ver_za64_s64_m (0, w0 + 2, p0, z0), -+ svwrite_ver_za64_m (0, w0 + 2, p0, z0)) -+ -+/* -+** write_za64_s64_0_w0m1_z0: -+** sub (w1[2-5]), w0, #?1 -+** mova za0v\.d\[\1, 0\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_0_w0m1_z0, svint64_t, -+ svwrite_ver_za64_s64_m (0, w0 - 1, p0, z0), -+ svwrite_ver_za64_m (0, w0 - 1, p0, z0)) -+ -+/* -+** write_za64_s64_1_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za1v\.d\[\1, 0\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_1_w0_z0, svint64_t, -+ svwrite_ver_za64_s64_m (1, w0, p0, z0), -+ svwrite_ver_za64_m (1, w0, p0, z0)) -+ -+/* -+** write_za64_s64_1_w0p1_z0: -+** mov (w1[2-5]), w0 -+** mova za1v\.d\[\1, 1\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_1_w0p1_z0, svint64_t, -+ svwrite_ver_za64_s64_m (1, w0 + 1, p0, z0), -+ svwrite_ver_za64_m (1, w0 + 1, p0, z0)) -+ -+/* -+** write_za64_s64_7_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za7v\.d\[\1, 0\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_7_w0_z0, svint64_t, -+ svwrite_ver_za64_s64_m (7, w0, p0, z0), -+ svwrite_ver_za64_m (7, w0, p0, z0)) -+ -+/* -+** write_za64_s64_7_w0p1_z0: -+** mov (w1[2-5]), w0 -+** mova za7v\.d\[\1, 1\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_7_w0p1_z0, svint64_t, -+ svwrite_ver_za64_s64_m (7, w0 + 1, p0, z0), -+ svwrite_ver_za64_m (7, w0 + 1, p0, z0)) -+ -+/* -+** write_za64_s64_0_w0_z1: -+** mov (w1[2-5]), w0 -+** mova za0v\.d\[\1, 0\], p0/m, z1\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_s64_0_w0_z1, svint64_t, -+ svwrite_ver_za64_s64_m (0, w0, p0, z1), -+ svwrite_ver_za64_m (0, w0, p0, z1)) -+ -+/* -+** write_za64_u64_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.d\[\1, 0\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_u64_0_w0_z0, svuint64_t, -+ svwrite_ver_za64_u64_m (0, w0, p0, z0), -+ svwrite_ver_za64_m (0, w0, p0, z0)) -+ -+/* -+** write_za64_f64_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.d\[\1, 0\], p0/m, z0\.d -+** ret -+*/ -+TEST_WRITE_ZA (write_za64_f64_0_w0_z0, svfloat64_t, -+ svwrite_ver_za64_f64_m (0, w0, p0, z0), -+ svwrite_ver_za64_m (0, w0, p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c -new file mode 100644 -index 000000000..dd6182821 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/write_ver_za8.c -@@ -0,0 +1,93 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sme_acle.h" -+ -+/* -+** write_za8_s8_0_0_z0: -+** mov (w1[2-5]), (?:wzr|#?0) -+** mova za0v\.b\[\1, 0\], p0/m, z0\.b -+** ret -+*/ -+TEST_WRITE_ZA (write_za8_s8_0_0_z0, svint8_t, -+ svwrite_ver_za8_s8_m (0, 0, p0, z0), -+ svwrite_ver_za8_m (0, 0, p0, z0)) -+ -+/* -+** write_za8_s8_0_1_z0: -+** mov (w1[2-5]), #?1 -+** mova za0v\.b\[\1, 0\], p0/m, z0\.b -+** ret -+*/ -+TEST_WRITE_ZA (write_za8_s8_0_1_z0, svint8_t, -+ svwrite_ver_za8_s8_m (0, 1, p0, z0), -+ svwrite_ver_za8_m (0, 1, p0, z0)) -+ -+/* -+** write_za8_s8_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.b\[\1, 0\], p0/m, z0\.b -+** ret -+*/ -+TEST_WRITE_ZA (write_za8_s8_0_w0_z0, svint8_t, -+ svwrite_ver_za8_s8_m (0, w0, p0, z0), -+ svwrite_ver_za8_m (0, w0, p0, z0)) -+ -+/* -+** write_za8_s8_0_w0p1_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.b\[\1, 1\], p0/m, z0\.b -+** ret -+*/ -+TEST_WRITE_ZA (write_za8_s8_0_w0p1_z0, svint8_t, -+ svwrite_ver_za8_s8_m (0, w0 + 1, p0, z0), -+ svwrite_ver_za8_m (0, w0 + 1, p0, z0)) -+ -+/* -+** write_za8_s8_0_w0p15_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.b\[\1, 15\], p0/m, z0\.b -+** ret -+*/ -+TEST_WRITE_ZA (write_za8_s8_0_w0p15_z0, svint8_t, -+ svwrite_ver_za8_s8_m (0, w0 + 15, p0, z0), -+ svwrite_ver_za8_m (0, w0 + 15, p0, z0)) -+ -+/* -+** write_za8_s8_0_w0p16_z0: -+** add (w1[2-5]), w0, #?16 -+** mova za0v\.b\[\1, 0\], p0/m, z0\.b -+** ret -+*/ -+TEST_WRITE_ZA (write_za8_s8_0_w0p16_z0, svint8_t, -+ svwrite_ver_za8_s8_m (0, w0 + 16, p0, z0), -+ svwrite_ver_za8_m (0, w0 + 16, p0, z0)) -+ -+/* -+** write_za8_s8_0_w0m1_z0: -+** sub (w1[2-5]), w0, #?1 -+** mova za0v\.b\[\1, 0\], p0/m, z0\.b -+** ret -+*/ -+TEST_WRITE_ZA (write_za8_s8_0_w0m1_z0, svint8_t, -+ svwrite_ver_za8_s8_m (0, w0 - 1, p0, z0), -+ svwrite_ver_za8_m (0, w0 - 1, p0, z0)) -+ -+/* -+** write_za8_s8_0_w0_z1: -+** mov (w1[2-5]), w0 -+** mova za0v\.b\[\1, 0\], p0/m, z1\.b -+** ret -+*/ -+TEST_WRITE_ZA (write_za8_s8_0_w0_z1, svint8_t, -+ svwrite_ver_za8_s8_m (0, w0, p0, z1), -+ svwrite_ver_za8_m (0, w0, p0, z1)) -+ -+/* -+** write_za8_u8_0_w0_z0: -+** mov (w1[2-5]), w0 -+** mova za0v\.b\[\1, 0\], p0/m, z0\.b -+** ret -+*/ -+TEST_WRITE_ZA (write_za8_u8_0_w0_z0, svuint8_t, -+ svwrite_ver_za8_u8_m (0, w0, p0, z0), -+ svwrite_ver_za8_m (0, w0, p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c -new file mode 100644 -index 000000000..9ce7331eb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_mask_za.c -@@ -0,0 +1,130 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#define STREAMING_COMPATIBLE -+#include "test_sme_acle.h" -+ -+/* -+** zero_mask_za_0: -+** zero { *} -+** ret -+*/ -+PROTO (zero_mask_za_0, void, ()) { svzero_mask_za (0); } -+ -+/* -+** zero_mask_za_01: -+** zero { za0\.d } -+** ret -+*/ -+PROTO (zero_mask_za_01, void, ()) { svzero_mask_za (0x01); } -+ -+/* -+** zero_mask_za_80: -+** zero { za7\.d } -+** ret -+*/ -+PROTO (zero_mask_za_80, void, ()) { svzero_mask_za (0x80); } -+ -+/* -+** zero_mask_za_03: -+** zero { za0\.d, za1\.d } -+** ret -+*/ -+PROTO (zero_mask_za_03, void, ()) { svzero_mask_za (0x03); } -+ -+/* -+** zero_mask_za_09: -+** zero { za0\.d, za3\.d } -+** ret -+*/ -+PROTO (zero_mask_za_09, void, ()) { svzero_mask_za (0x09); } -+ -+/* -+** zero_mask_za_0d: -+** zero { za0\.d, za2\.d, za3\.d } -+** ret -+*/ -+PROTO (zero_mask_za_0d, void, ()) { svzero_mask_za (0x0d); } -+ -+/* -+** zero_mask_za_3c: -+** zero { za2\.d, za3\.d, za4\.d, za5\.d } -+** ret -+*/ -+PROTO (zero_mask_za_3c, void, ()) { svzero_mask_za (0x3c); } -+ -+/* -+** zero_mask_za_5a: -+** zero { za1\.d, za3\.d, za4\.d, za6\.d } -+** ret -+*/ -+PROTO (zero_mask_za_5a, void, ()) { svzero_mask_za (0x5a); } -+ -+/* -+** zero_mask_za_11: -+** zero { za0\.s } -+** ret -+*/ -+PROTO (zero_mask_za_11, void, ()) { svzero_mask_za (0x11); } -+ -+/* -+** zero_mask_za_88: -+** zero { za3\.s } -+** ret -+*/ -+PROTO (zero_mask_za_88, void, ()) { svzero_mask_za (0x88); } -+ -+/* -+** zero_mask_za_33: -+** zero { za0\.s, za1\.s } -+** ret -+*/ -+PROTO (zero_mask_za_33, void, ()) { svzero_mask_za (0x33); } -+ -+/* -+** zero_mask_za_cc: -+** zero { za2\.s, za3\.s } -+** ret -+*/ -+PROTO (zero_mask_za_cc, void, ()) { svzero_mask_za (0xcc); } -+ -+/* -+** zero_mask_za_55: -+** zero { za0\.h } -+** ret -+*/ -+PROTO (zero_mask_za_55, void, ()) { svzero_mask_za (0x55); } -+ -+/* -+** zero_mask_za_aa: -+** zero { za1\.h } -+** ret -+*/ -+PROTO (zero_mask_za_aa, void, ()) { svzero_mask_za (0xaa); } -+ -+/* -+** zero_mask_za_ab: -+** zero { za1\.h, za0\.d } -+** ret -+*/ -+PROTO (zero_mask_za_ab, void, ()) { svzero_mask_za (0xab); } -+ -+/* -+** zero_mask_za_d7: -+** zero { za0\.h, za1\.d, za7\.d } -+** ret -+*/ -+PROTO (zero_mask_za_d7, void, ()) { svzero_mask_za (0xd7); } -+ -+/* -+** zero_mask_za_bf: -+** zero { za1\.h, za0\.s, za2\.d } -+** ret -+*/ -+PROTO (zero_mask_za_bf, void, ()) { svzero_mask_za (0xbf); } -+ -+/* -+** zero_mask_za_ff: -+** zero { za } -+** ret -+*/ -+PROTO (zero_mask_za_ff, void, ()) { svzero_mask_za (0xff); } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_za.c b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_za.c -new file mode 100644 -index 000000000..4688d0950 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/acle-asm/zero_za.c -@@ -0,0 +1,11 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#define STREAMING_COMPATIBLE -+#include "test_sme_acle.h" -+ -+/* -+** zero_za: -+** zero { za } -+** ret -+*/ -+PROTO (zero_za, void, ()) { svzero_za (); } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h -index d8916809b..84925b9bd 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h -@@ -12,11 +12,21 @@ - #endif - - #ifdef STREAMING_COMPATIBLE --#define ATTR __arm_streaming_compatible -+#define SM_ATTR __arm_streaming_compatible -+#elif defined(STREAMING) -+#define SM_ATTR __arm_streaming - #else --#define ATTR -+#define SM_ATTR - #endif - -+#ifdef SHARED_ZA -+#define ZA_ATTR __arm_inout("za") -+#else -+#define ZA_ATTR -+#endif -+ -+#define ATTR SM_ATTR ZA_ATTR -+ - #ifdef __cplusplus - #define PROTO(NAME, RET, ARGS) \ - extern "C" RET NAME ARGS ATTR; RET NAME ARGS ATTR -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_int_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_int_m_1.c -new file mode 100644 -index 000000000..fce1ef1dd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_int_m_1.c -@@ -0,0 +1,50 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+#pragma GCC target ("arch=armv9-a+sme") -+ -+void -+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svfloat16_t f16, uint32_t tile) -+ __arm_streaming __arm_inout("za") -+{ -+ svusmopa_za32_m (0, pg, pg, u8); /* { dg-error {too few arguments to function 'svusmopa_za32_m'} } */ -+ svusmopa_za32_m (0, pg, pg, u8, s8, 0); /* { dg-error {too many arguments to function 'svusmopa_za32_m'} } */ -+ svusmopa_za32_m (tile, pg, pg, u8, s8); /* { dg-error {argument 1 of 'svusmopa_za32_m' must be an integer constant expression} } */ -+ svusmopa_za32_m (-1, pg, pg, u8, s8); /* { dg-error {passing -1 to argument 1 of 'svusmopa_za32_m', which expects a value in the range \[0, 3\]} } */ -+ svusmopa_za32_m (4, pg, pg, u8, s8); /* { dg-error {passing 4 to argument 1 of 'svusmopa_za32_m', which expects a value in the range \[0, 3\]} } */ -+ svusmopa_za32_m (0, u8, pg, u8, s8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svusmopa_za32_m', which expects 'svbool_t'} } */ -+ svusmopa_za32_m (0, pg, u8, u8, s8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusmopa_za32_m', which expects 'svbool_t'} } */ -+ svusmopa_za32_m (0, pg, pg, tile, s8); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svusmopa_za32_m', which expects an SVE type} } */ -+ svusmopa_za32_m (0, pg, pg, s8, s8); /* { dg-error {'svusmopa_za32_m' has no form that takes 'svint8_t' arguments} } */ -+ svusmopa_za32_m (0, pg, pg, pg, s8); /* { dg-error {'svusmopa_za32_m' has no form that takes 'svbool_t' arguments} } */ -+ svusmopa_za32_m (0, pg, pg, f16, s8); /* { dg-error {'svusmopa_za32_m' has no form that takes 'svfloat16_t' arguments} } */ -+ svusmopa_za32_m (0, pg, pg, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 5 of 'svusmopa_za32_m', which expects a vector of signed integers} } */ -+ svusmopa_za32_m (0, pg, pg, u8, s16); /* { dg-error {arguments 4 and 5 of 'svusmopa_za32_m' must have the same element size, but the values passed here have type 'svuint8_t' and 'svint16_t' respectively} } */ -+ svusmopa_za32_m (0, pg, pg, u16, s16); /* { dg-error {'svusmopa_za32_m' has no form that takes 'svuint16_t' arguments} } */ -+ -+ svusmopa_za64_m (0, pg, pg, u16, s16); /* { dg-error {ACLE function 'svusmopa_za64_u16_m' requires ISA extension 'sme-i16i64'} } */ -+} -+ -+void -+f2 (svbool_t pg, svint8_t s8, svuint8_t u8) __arm_streaming -+{ -+ svusmopa_za32_m (0, pg, pg, u8, s8); /* { dg-error {ACLE function 'svusmopa_za32_u8_m' can only be called from a function that has 'za' state} } */ -+} -+ -+void -+f3 (svbool_t pg, svint8_t s8, svuint8_t u8) __arm_inout("za") -+{ -+ svusmopa_za32_m (0, pg, pg, u8, s8); /* { dg-error {ACLE function 'svusmopa_za32_u8_m' can only be called when SME streaming mode is enabled} } */ -+} -+ -+#pragma GCC target ("arch=armv9-a+sme-i16i64") -+ -+void -+f4 (svbool_t pg, svint16_t s16, svuint16_t u16) -+ __arm_streaming __arm_inout("za") -+{ -+ svusmopa_za64_m (-1, pg, pg, u16, s16); /* { dg-error {passing -1 to argument 1 of 'svusmopa_za64_m', which expects a value in the range \[0, 7\]} } */ -+ svusmopa_za64_m (8, pg, pg, u16, s16); /* { dg-error {passing 8 to argument 1 of 'svusmopa_za64_m', which expects a value in the range \[0, 7\]} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c -new file mode 100644 -index 000000000..7e91a41cc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_1.c -@@ -0,0 +1,49 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+#pragma GCC target ("arch=armv9-a+sme") -+ -+void -+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svint32_t s32, -+ svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, uint32_t tile) -+ __arm_streaming __arm_inout("za") -+{ -+ svmopa_za32_m (0, pg, pg, s8); /* { dg-error {too few arguments to function 'svmopa_za32_m'} } */ -+ svmopa_za32_m (0, pg, pg, s8, s8, 0); /* { dg-error {too many arguments to function 'svmopa_za32_m'} } */ -+ svmopa_za32_m (tile, pg, pg, s8, s8); /* { dg-error {argument 1 of 'svmopa_za32_m' must be an integer constant expression} } */ -+ svmopa_za32_m (-1, pg, pg, s8, s8); /* { dg-error {passing -1 to argument 1 of 'svmopa_za32_m', which expects a value in the range \[0, 3\]} } */ -+ svmopa_za32_m (4, pg, pg, s8, s8); /* { dg-error {passing 4 to argument 1 of 'svmopa_za32_m', which expects a value in the range \[0, 3\]} } */ -+ svmopa_za32_m (0, u8, pg, s8, s8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svmopa_za32_m', which expects 'svbool_t'} } */ -+ svmopa_za32_m (0, pg, u8, s8, s8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svmopa_za32_m', which expects 'svbool_t'} } */ -+ svmopa_za32_m (0, pg, pg, tile, s8); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svmopa_za32_m', which expects an SVE type} } */ -+ svmopa_za32_m (0, pg, pg, u8, s8); /* { dg-error {passing 'svint8_t'.* to argument 5 of 'svmopa_za32_m', but argument 4 had type 'svuint8_t'} } */ -+ svmopa_za32_m (0, pg, pg, s8, f16); /* { dg-error {passing 'svfloat16_t'.* to argument 5 of 'svmopa_za32_m', but argument 4 had type 'svint8_t'} } */ -+ svmopa_za32_m (0, pg, pg, pg, pg); /* { dg-error {'svmopa_za32_m' has no form that takes 'svbool_t' arguments} } */ -+ svmopa_za32_m (0, pg, pg, s16, s16); /* { dg-error {'svmopa_za32_m' has no form that takes 'svint16_t' arguments} } */ -+ svmopa_za32_m (0, pg, pg, s32, s32); /* { dg-error {'svmopa_za32_m' has no form that takes 'svint32_t' arguments} } */ -+ svmopa_za32_m (0, pg, pg, f64, f64); /* { dg-error {'svmopa_za32_m' has no form that takes 'svfloat64_t' arguments} } */ -+ -+ svmopa_za64_m (0, pg, pg, s16, s16); /* { dg-error {ACLE function 'svmopa_za64_s16_m' requires ISA extension 'sme-i16i64'} } */ -+} -+ -+void -+f2 (svbool_t pg, svint8_t s8) __arm_streaming -+{ -+ svmopa_za32_m (0, pg, pg, s8, s8); /* { dg-error {ACLE function 'svmopa_za32_s8_m' can only be called from a function that has 'za' state} } */ -+} -+ -+void -+f3 (svbool_t pg, svint8_t s8) __arm_inout("za") -+{ -+ svmopa_za32_m (0, pg, pg, s8, s8); /* { dg-error {ACLE function 'svmopa_za32_s8_m' can only be called when SME streaming mode is enabled} } */ -+} -+ -+#pragma GCC target ("arch=armv9-a+sme-i16i64") -+ -+void -+f4 (svbool_t pg, svint16_t s16) __arm_streaming __arm_inout("za") -+{ -+ svmopa_za64_m (-1, pg, pg, s16, s16); /* { dg-error {passing -1 to argument 1 of 'svmopa_za64_m', which expects a value in the range \[0, 7\]} } */ -+ svmopa_za64_m (8, pg, pg, s16, s16); /* { dg-error {passing 8 to argument 1 of 'svmopa_za64_m', which expects a value in the range \[0, 7\]} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_2.c -new file mode 100644 -index 000000000..dfc1b737d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_m_2.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+#pragma GCC target ("arch=armv9-a+sme") -+ -+void -+f1 (svbool_t pg, svfloat64_t f64) __arm_streaming __arm_inout("za") -+{ -+ svmopa_za64_m (0, pg, pg, f64, f64); /* { dg-error {ACLE function 'svmopa_za64_f64_m' requires ISA extension 'sme-f64f64'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_uint_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_uint_m_1.c -new file mode 100644 -index 000000000..555f95a61 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_za_uint_m_1.c -@@ -0,0 +1,50 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+#pragma GCC target ("arch=armv9-a+sme") -+ -+void -+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svfloat16_t f16, uint32_t tile) -+ __arm_streaming __arm_inout("za") -+{ -+ svsumopa_za32_m (0, pg, pg, s8); /* { dg-error {too few arguments to function 'svsumopa_za32_m'} } */ -+ svsumopa_za32_m (0, pg, pg, s8, u8, 0); /* { dg-error {too many arguments to function 'svsumopa_za32_m'} } */ -+ svsumopa_za32_m (tile, pg, pg, s8, u8); /* { dg-error {argument 1 of 'svsumopa_za32_m' must be an integer constant expression} } */ -+ svsumopa_za32_m (-1, pg, pg, s8, u8); /* { dg-error {passing -1 to argument 1 of 'svsumopa_za32_m', which expects a value in the range \[0, 3\]} } */ -+ svsumopa_za32_m (4, pg, pg, s8, u8); /* { dg-error {passing 4 to argument 1 of 'svsumopa_za32_m', which expects a value in the range \[0, 3\]} } */ -+ svsumopa_za32_m (0, u8, pg, s8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsumopa_za32_m', which expects 'svbool_t'} } */ -+ svsumopa_za32_m (0, pg, u8, s8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svsumopa_za32_m', which expects 'svbool_t'} } */ -+ svsumopa_za32_m (0, pg, pg, tile, s8); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svsumopa_za32_m', which expects an SVE type} } */ -+ svsumopa_za32_m (0, pg, pg, u8, u8); /* { dg-error {'svsumopa_za32_m' has no form that takes 'svuint8_t' arguments} } */ -+ svsumopa_za32_m (0, pg, pg, pg, u8); /* { dg-error {'svsumopa_za32_m' has no form that takes 'svbool_t' arguments} } */ -+ svsumopa_za32_m (0, pg, pg, f16, u8); /* { dg-error {'svsumopa_za32_m' has no form that takes 'svfloat16_t' arguments} } */ -+ svsumopa_za32_m (0, pg, pg, s8, s8); /* { dg-error {passing 'svint8_t' to argument 5 of 'svsumopa_za32_m', which expects a vector of unsigned integers} } */ -+ svsumopa_za32_m (0, pg, pg, s8, u16); /* { dg-error {arguments 4 and 5 of 'svsumopa_za32_m' must have the same element size, but the values passed here have type 'svint8_t' and 'svuint16_t' respectively} } */ -+ svsumopa_za32_m (0, pg, pg, s16, u16); /* { dg-error {'svsumopa_za32_m' has no form that takes 'svint16_t' arguments} } */ -+ -+ svsumopa_za64_m (0, pg, pg, s16, u16); /* { dg-error {ACLE function 'svsumopa_za64_s16_m' requires ISA extension 'sme-i16i64'} } */ -+} -+ -+void -+f2 (svbool_t pg, svint8_t s8, svuint8_t u8) __arm_streaming -+{ -+ svsumopa_za32_m (0, pg, pg, s8, u8); /* { dg-error {ACLE function 'svsumopa_za32_s8_m' can only be called from a function that has 'za' state} } */ -+} -+ -+void -+f3 (svbool_t pg, svint8_t s8, svuint8_t u8) __arm_inout("za") -+{ -+ svsumopa_za32_m (0, pg, pg, s8, u8); /* { dg-error {ACLE function 'svsumopa_za32_s8_m' can only be called when SME streaming mode is enabled} } */ -+} -+ -+#pragma GCC target ("arch=armv9-a+sme-i16i64") -+ -+void -+f4 (svbool_t pg, svint16_t s16, svuint16_t u16) -+ __arm_streaming __arm_inout("za") -+{ -+ svsumopa_za64_m (-1, pg, pg, s16, u16); /* { dg-error {passing -1 to argument 1 of 'svsumopa_za64_m', which expects a value in the range \[0, 7\]} } */ -+ svsumopa_za64_m (8, pg, pg, s16, u16); /* { dg-error {passing 8 to argument 1 of 'svsumopa_za64_m', which expects a value in the range \[0, 7\]} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_4.c -index 9591e3d01..5aa0ea671 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_4.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_4.c -@@ -4,6 +4,7 @@ - to be diagnosed. Any attempt to call the function before including - arm_sve.h will lead to a link failure. (Same for taking its address, - etc.) */ --extern __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t); -+extern __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t) -+ __arm_streaming_compatible; - - #pragma GCC aarch64 "arm_sve.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_5.c -index 85923611d..ede9a8063 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_5.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_5.c -@@ -8,6 +8,7 @@ - explicit definition "wins". This isn't supported behavior though. */ - __SVUint8_t - svadd_u8_x (__SVBool_t pg, __SVUint8_t x, __SVUint8_t y) -+ __arm_streaming_compatible - { - return x; - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/read_za_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/read_za_m_1.c -new file mode 100644 -index 000000000..421979ea0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/read_za_m_1.c -@@ -0,0 +1,48 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+#pragma GCC target ("arch=armv9-a+sme") -+ -+void -+f1 (svbool_t pg, svint8_t s8, svint64_t s64, svuint8_t u8, svuint16_t u16, -+ svfloat32_t f32, uint32_t tile) -+ __arm_streaming __arm_inout("za") -+{ -+ svread_hor_za8_m (s8, pg, 0); /* { dg-error {too few arguments to function 'svread_hor_za8_m'} } */ -+ svread_hor_za8_m (s8, pg, 0, 0, 0); /* { dg-error {too many arguments to function 'svread_hor_za8_m'} } */ -+ svread_hor_za8_m (tile, pg, 0, 0); /* { dg-error {passing 'uint32_t'.* to argument 1 of 'svread_hor_za8_m', which expects an SVE type} } */ -+ svread_hor_za8_m (pg, pg, 0, 0); /* { dg-error {'svread_hor_za8_m' has no form that takes 'svbool_t' arguments} } */ -+ svread_hor_za8_m (u16, pg, 0, 0); /* { dg-error {'svread_hor_za8_m' has no form that takes 'svuint16_t' arguments} } */ -+ svread_hor_za8_m (s8, s8, 0, 0); /* { dg-error {passing 'svint8_t' to argument 2 of 'svread_hor_za8_m', which expects 'svbool_t'} } */ -+ svread_hor_za8_m (s8, pg, tile, 0); /* { dg-error {argument 3 of 'svread_hor_za8_m' must be an integer constant expression} } */ -+ svread_hor_za8_m (s8, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za8_m', which expects the value 0} } */ -+ svread_hor_za8_m (s8, pg, 1, 0); /* { dg-error {passing 1 to argument 3 of 'svread_hor_za8_m', which expects the value 0} } */ -+ svread_hor_za8_m (s8, pg, 0, u8); /* { dg-error {passing 'svuint8_t' to argument 4 of 'svread_hor_za8_m', which expects 'uint32_t'} } */ -+ -+ svread_hor_za16_m (u16, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za16_m', which expects a value in the range \[0, 1\]} } */ -+ svread_hor_za16_m (u16, pg, 2, 0); /* { dg-error {passing 2 to argument 3 of 'svread_hor_za16_m', which expects a value in the range \[0, 1\]} } */ -+ -+ svread_hor_za32_m (f32, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za32_m', which expects a value in the range \[0, 3\]} } */ -+ svread_hor_za32_m (f32, pg, 4, 0); /* { dg-error {passing 4 to argument 3 of 'svread_hor_za32_m', which expects a value in the range \[0, 3\]} } */ -+ -+ svread_hor_za64_m (s64, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za64_m', which expects a value in the range \[0, 7\]} } */ -+ svread_hor_za64_m (s64, pg, 8, 0); /* { dg-error {passing 8 to argument 3 of 'svread_hor_za64_m', which expects a value in the range \[0, 7\]} } */ -+ -+ svread_hor_za128_m (s8, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za128_m', which expects a value in the range \[0, 15\]} } */ -+ svread_hor_za128_m (s8, pg, 16, 0); /* { dg-error {passing 16 to argument 3 of 'svread_hor_za128_m', which expects a value in the range \[0, 15\]} } */ -+ svread_hor_za128_m (f32, pg, -1, 0); /* { dg-error {passing -1 to argument 3 of 'svread_hor_za128_m', which expects a value in the range \[0, 15\]} } */ -+ svread_hor_za128_m (f32, pg, 16, 0); /* { dg-error {passing 16 to argument 3 of 'svread_hor_za128_m', which expects a value in the range \[0, 15\]} } */ -+} -+ -+void -+f2 (svbool_t pg, svint8_t s8) __arm_streaming -+{ -+ svread_hor_za8_m (s8, pg, 0, 0); /* { dg-error {ACLE function 'svread_hor_za8_s8_m' can only be called from a function that has 'za' state} } */ -+} -+ -+void -+f3 (svbool_t pg, svint8_t s8) __arm_inout("za") -+{ -+ svread_hor_za8_m (s8, pg, 0, 0); /* { dg-error {ACLE function 'svread_hor_za8_s8_m' can only be called when SME streaming mode is enabled} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_m_1.c -new file mode 100644 -index 000000000..948ce2cb3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_za_m_1.c -@@ -0,0 +1,49 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+#pragma GCC target ("arch=armv9-a+sme") -+ -+void -+f1 (svbool_t pg, svuint8_t u8, svint16_t s16, svint32_t s32, svint64_t s64, -+ svfloat32_t f32, uint32_t tile) -+ __arm_streaming __arm_inout("za") -+{ -+ svaddha_za32_m (0, pg, pg); /* { dg-error {too few arguments to function 'svaddha_za32_m'} } */ -+ svaddha_za32_m (0, pg, pg, s32, s32); /* { dg-error {too many arguments to function 'svaddha_za32_m'} } */ -+ svaddha_za32_m (tile, pg, pg, s32); /* { dg-error {argument 1 of 'svaddha_za32_m' must be an integer constant expression} } */ -+ svaddha_za32_m (-1, pg, pg, s32); /* { dg-error {passing -1 to argument 1 of 'svaddha_za32_m', which expects a value in the range \[0, 3\]} } */ -+ svaddha_za32_m (4, pg, pg, s32); /* { dg-error {passing 4 to argument 1 of 'svaddha_za32_m', which expects a value in the range \[0, 3\]} } */ -+ svaddha_za32_m (0, u8, pg, s32); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svaddha_za32_m', which expects 'svbool_t'} } */ -+ svaddha_za32_m (0, pg, u8, s32); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svaddha_za32_m', which expects 'svbool_t'} } */ -+ svaddha_za32_m (0, pg, pg, tile); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svaddha_za32_m', which expects an SVE type} } */ -+ svaddha_za32_m (0, pg, pg, pg); /* { dg-error {'svaddha_za32_m' has no form that takes 'svbool_t' arguments} } */ -+ svaddha_za32_m (0, pg, pg, u8); /* { dg-error {'svaddha_za32_m' has no form that takes 'svuint8_t' arguments} } */ -+ svaddha_za32_m (0, pg, pg, s16); /* { dg-error {'svaddha_za32_m' has no form that takes 'svint16_t' arguments} } */ -+ svaddha_za32_m (0, pg, pg, f32); /* { dg-error {'svaddha_za32_m' has no form that takes 'svfloat32_t' arguments} } */ -+ svaddha_za32_m (0, pg, pg, s64); /* { dg-error {'svaddha_za32_m' has no form that takes 'svint64_t' arguments} } */ -+ -+ svaddha_za64_m (0, pg, pg, s64); /* { dg-error {ACLE function 'svaddha_za64_s64_m' requires ISA extension 'sme-i16i64'} } */ -+} -+ -+void -+f2 (svbool_t pg, svint32_t s32) __arm_streaming -+{ -+ svaddha_za32_m (0, pg, pg, s32); /* { dg-error {ACLE function 'svaddha_za32_s32_m' can only be called from a function that has 'za' state} } */ -+} -+ -+void -+f3 (svbool_t pg, svint32_t s32) __arm_inout("za") -+{ -+ svaddha_za32_m (0, pg, pg, s32); /* { dg-error {ACLE function 'svaddha_za32_s32_m' can only be called when SME streaming mode is enabled} } */ -+} -+ -+#pragma GCC target ("arch=armv9-a+sme-i16i64") -+ -+void -+f4 (svbool_t pg, svint64_t s64) -+ __arm_streaming __arm_inout("za") -+{ -+ svaddha_za64_m (-1, pg, pg, s64); /* { dg-error {passing -1 to argument 1 of 'svaddha_za64_m', which expects a value in the range \[0, 7\]} } */ -+ svaddha_za64_m (8, pg, pg, s64); /* { dg-error {passing 8 to argument 1 of 'svaddha_za64_m', which expects a value in the range \[0, 7\]} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_m_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_m_1.c -new file mode 100644 -index 000000000..af79c406b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/write_za_m_1.c -@@ -0,0 +1,48 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+#pragma GCC target ("arch=armv9-a+sme") -+ -+void -+f1 (svbool_t pg, svint8_t s8, svint64_t s64, svuint8_t u8, svuint16_t u16, -+ svfloat32_t f32, uint32_t tile) -+ __arm_streaming __arm_inout("za") -+{ -+ svwrite_ver_za8_m (0, 0, pg); /* { dg-error {too few arguments to function 'svwrite_ver_za8_m'} } */ -+ svwrite_ver_za8_m (0, 0, pg, s8, 0); /* { dg-error {too many arguments to function 'svwrite_ver_za8_m'} } */ -+ svwrite_ver_za8_m (tile, 0, pg, s8); /* { dg-error {argument 1 of 'svwrite_ver_za8_m' must be an integer constant expression} } */ -+ svwrite_ver_za8_m (-1, 0, pg, s8); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za8_m', which expects the value 0} } */ -+ svwrite_ver_za8_m (1, 0, pg, s8); /* { dg-error {passing 1 to argument 1 of 'svwrite_ver_za8_m', which expects the value 0} } */ -+ svwrite_ver_za8_m (0, u8, pg, s8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svwrite_ver_za8_m', which expects 'uint32_t'} } */ -+ svwrite_ver_za8_m (0, 0, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svwrite_ver_za8_m', which expects 'svbool_t'} } */ -+ svwrite_ver_za8_m (0, 0, pg, tile); /* { dg-error {passing 'uint32_t'.* to argument 4 of 'svwrite_ver_za8_m', which expects an SVE type} } */ -+ svwrite_ver_za8_m (0, 0, pg, pg); /* { dg-error {'svwrite_ver_za8_m' has no form that takes 'svbool_t' arguments} } */ -+ svwrite_ver_za8_m (0, 0, pg, u16); /* { dg-error {'svwrite_ver_za8_m' has no form that takes 'svuint16_t' arguments} } */ -+ -+ svwrite_ver_za16_m (-1, 0, pg, u16); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za16_m', which expects a value in the range \[0, 1\]} } */ -+ svwrite_ver_za16_m (2, 0, pg, u16); /* { dg-error {passing 2 to argument 1 of 'svwrite_ver_za16_m', which expects a value in the range \[0, 1\]} } */ -+ -+ svwrite_ver_za32_m (-1, 0, pg, f32); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za32_m', which expects a value in the range \[0, 3\]} } */ -+ svwrite_ver_za32_m (4, 0, pg, f32); /* { dg-error {passing 4 to argument 1 of 'svwrite_ver_za32_m', which expects a value in the range \[0, 3\]} } */ -+ -+ svwrite_ver_za64_m (-1, 0, pg, s64); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za64_m', which expects a value in the range \[0, 7\]} } */ -+ svwrite_ver_za64_m (8, 0, pg, s64); /* { dg-error {passing 8 to argument 1 of 'svwrite_ver_za64_m', which expects a value in the range \[0, 7\]} } */ -+ -+ svwrite_ver_za128_m (-1, 0, pg, s8); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za128_m', which expects a value in the range \[0, 15\]} } */ -+ svwrite_ver_za128_m (16, 0, pg, s8); /* { dg-error {passing 16 to argument 1 of 'svwrite_ver_za128_m', which expects a value in the range \[0, 15\]} } */ -+ svwrite_ver_za128_m (-1, 0, pg, f32); /* { dg-error {passing -1 to argument 1 of 'svwrite_ver_za128_m', which expects a value in the range \[0, 15\]} } */ -+ svwrite_ver_za128_m (16, 0, pg, f32); /* { dg-error {passing 16 to argument 1 of 'svwrite_ver_za128_m', which expects a value in the range \[0, 15\]} } */ -+} -+ -+void -+f2 (svbool_t pg, svint8_t s8) __arm_streaming -+{ -+ svwrite_ver_za8_m (0, 0, pg, s8); /* { dg-error {ACLE function 'svwrite_ver_za8_s8_m' can only be called from a function that has 'za' state} } */ -+} -+ -+void -+f3 (svbool_t pg, svint8_t s8) __arm_inout("za") -+{ -+ svwrite_ver_za8_m (0, 0, pg, s8); /* { dg-error {ACLE function 'svwrite_ver_za8_s8_m' can only be called when SME streaming mode is enabled} } */ -+} -diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp -index e2a9ef5fa..292737dae 100644 ---- a/gcc/testsuite/lib/target-supports.exp -+++ b/gcc/testsuite/lib/target-supports.exp -@@ -10622,7 +10622,8 @@ proc check_effective_target_aarch64_tiny { } { - # various architecture extensions via the .arch_extension pseudo-op. - - foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve" -- "i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" } { -+ "i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" -+ "sme" "sme-i16i64" } { - eval [string map [list FUNC $aarch64_ext] { - proc check_effective_target_aarch64_asm_FUNC_ok { } { - if { [istarget aarch64*-*-*] } { --- -2.33.0 - diff --git a/0212-Backport-SME-aarch64-Add-support-for-__arm_locally_s.patch b/0212-Backport-SME-aarch64-Add-support-for-__arm_locally_s.patch deleted file mode 100644 index 72576e3..0000000 --- a/0212-Backport-SME-aarch64-Add-support-for-__arm_locally_s.patch +++ /dev/null @@ -1,1748 +0,0 @@ -From 0ad41f11bea5c303ff39c54cae8e46afdfae6070 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:29 +0000 -Subject: [PATCH 113/157] [Backport][SME] aarch64: Add support for - __arm_locally_streaming - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3f6e5991fab507aa79121dc44d1afcd622c78744 - -This patch adds support for the __arm_locally_streaming attribute, -which allows a function to use SME internally without changing -the function's ABI. The attribute is valid but redundant for -__arm_streaming functions. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_arm_attribute_table): Add - arm::locally_streaming. - (aarch64_fndecl_is_locally_streaming): New function. - (aarch64_fndecl_sm_state): Handle locally-streaming functions. - (aarch64_cfun_enables_pstate_sm): New function. - (aarch64_add_offset): Add an argument that specifies whether - the streaming vector length should be used instead of the - prevailing one. - (aarch64_split_add_offset, aarch64_add_sp, aarch64_sub_sp): Likewise. - (aarch64_allocate_and_probe_stack_space): Likewise. - (aarch64_expand_mov_immediate): Update calls accordingly. - (aarch64_need_old_pstate_sm): Return true for locally-streaming - streaming-compatible functions. - (aarch64_layout_frame): Force all call-preserved Z and P registers - to be saved and restored if the function switches PSTATE.SM in the - prologue. - (aarch64_get_separate_components): Disable shrink-wrapping of - such Z and P saves and restores. - (aarch64_use_late_prologue_epilogue): New function. - (aarch64_expand_prologue): Measure SVE lengths in the streaming - vector length for locally-streaming functions, then emit code - to enable streaming mode. - (aarch64_expand_epilogue): Likewise in reverse. - (TARGET_USE_LATE_PROLOGUE_EPILOGUE): Define. - * config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros): - Define __arm_locally_streaming. - -gcc/testsuite/ - * gcc.target/aarch64/sme/locally_streaming_1.c: New test. - * gcc.target/aarch64/sme/locally_streaming_2.c: Likewise. - * gcc.target/aarch64/sme/locally_streaming_3.c: Likewise. - * gcc.target/aarch64/sme/locally_streaming_4.c: Likewise. - * gcc.target/aarch64/sme/keyword_macros_1.c: Add - __arm_locally_streaming. - * g++.target/aarch64/sme/keyword_macros_1.C: Likewise. ---- - gcc/config/aarch64/aarch64-c.cc | 1 + - gcc/config/aarch64/aarch64.cc | 233 +++++++-- - .../g++.target/aarch64/sme/keyword_macros_1.C | 1 + - .../gcc.target/aarch64/sme/keyword_macros_1.c | 1 + - .../aarch64/sme/locally_streaming_1.c | 466 ++++++++++++++++++ - .../aarch64/sme/locally_streaming_2.c | 177 +++++++ - .../aarch64/sme/locally_streaming_3.c | 273 ++++++++++ - .../aarch64/sme/locally_streaming_4.c | 145 ++++++ - 8 files changed, 1259 insertions(+), 38 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_2.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_3.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_4.c - -diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc -index cb8a6c2fc..745719d8b 100644 ---- a/gcc/config/aarch64/aarch64-c.cc -+++ b/gcc/config/aarch64/aarch64-c.cc -@@ -86,6 +86,7 @@ aarch64_define_unconditional_macros (cpp_reader *pfile) - - DEFINE_ARM_KEYWORD_MACRO ("streaming"); - DEFINE_ARM_KEYWORD_MACRO ("streaming_compatible"); -+ DEFINE_ARM_KEYWORD_MACRO ("locally_streaming"); - - #undef DEFINE_ARM_KEYWORD_MACRO - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 113784e31..4cb43c2e2 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -3283,6 +3283,7 @@ static const attribute_spec aarch64_arm_attributes[] = - NULL, attr_streaming_exclusions }, - { "streaming_compatible", 0, 0, false, true, true, true, - NULL, attr_streaming_exclusions }, -+ { "locally_streaming", 0, 0, true, false, false, false, NULL, NULL }, - { "new", 1, -1, true, false, false, false, - handle_arm_new, NULL }, - { "preserves", 1, -1, false, true, true, true, -@@ -4657,6 +4658,16 @@ aarch64_fntype_isa_mode (const_tree fntype) - | aarch64_fntype_pstate_za (fntype)); - } - -+/* Return true if FNDECL uses streaming mode internally, as an -+ implementation choice. */ -+ -+static bool -+aarch64_fndecl_is_locally_streaming (const_tree fndecl) -+{ -+ return lookup_attribute ("arm", "locally_streaming", -+ DECL_ATTRIBUTES (fndecl)); -+} -+ - /* Return the state of PSTATE.SM when compiling the body of - function FNDECL. This might be different from the state of - PSTATE.SM on entry. */ -@@ -4664,6 +4675,9 @@ aarch64_fntype_isa_mode (const_tree fntype) - static aarch64_feature_flags - aarch64_fndecl_pstate_sm (const_tree fndecl) - { -+ if (aarch64_fndecl_is_locally_streaming (fndecl)) -+ return AARCH64_FL_SM_ON; -+ - return aarch64_fntype_pstate_sm (TREE_TYPE (fndecl)); - } - -@@ -4739,6 +4753,16 @@ aarch64_cfun_has_new_state (const char *state_name) - return aarch64_fndecl_has_new_state (cfun->decl, state_name); - } - -+/* Return true if PSTATE.SM is 1 in the body of the current function, -+ but is not guaranteed to be 1 on entry. */ -+ -+static bool -+aarch64_cfun_enables_pstate_sm () -+{ -+ return (aarch64_fndecl_is_locally_streaming (cfun->decl) -+ && aarch64_cfun_incoming_pstate_sm () != AARCH64_FL_SM_ON); -+} -+ - /* Return true if the current function has state STATE_NAME, either by - creating new state itself or by sharing state with callers. */ - -@@ -6931,6 +6955,10 @@ aarch64_add_offset_temporaries (rtx x) - TEMP2, if nonnull, is a second temporary register that doesn't - overlap either DEST or REG. - -+ FORCE_ISA_MODE is AARCH64_FL_SM_ON if any variable component of OFFSET -+ is measured relative to the SME vector length instead of the current -+ prevailing vector length. It is 0 otherwise. -+ - Since this function may be used to adjust the stack pointer, we must - ensure that it cannot cause transient stack deallocation (for example - by first incrementing SP and then decrementing when adjusting by a -@@ -6939,6 +6967,7 @@ aarch64_add_offset_temporaries (rtx x) - static void - aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, - poly_int64 offset, rtx temp1, rtx temp2, -+ aarch64_feature_flags force_isa_mode, - bool frame_related_p, bool emit_move_imm = true) - { - gcc_assert (emit_move_imm || temp1 != NULL_RTX); -@@ -6951,9 +6980,18 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, - /* Try using ADDVL or ADDPL to add the whole value. */ - if (src != const0_rtx && aarch64_sve_addvl_addpl_immediate_p (offset)) - { -- rtx offset_rtx = gen_int_mode (offset, mode); -+ gcc_assert (offset.coeffs[0] == offset.coeffs[1]); -+ rtx offset_rtx; -+ if (force_isa_mode == 0) -+ offset_rtx = gen_int_mode (offset, mode); -+ else -+ offset_rtx = aarch64_sme_vq_immediate (mode, offset.coeffs[0], 0); - rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx)); - RTX_FRAME_RELATED_P (insn) = frame_related_p; -+ if (frame_related_p && (force_isa_mode & AARCH64_FL_SM_ON)) -+ add_reg_note (insn, REG_CFA_ADJUST_CFA, -+ gen_rtx_SET (dest, plus_constant (Pmode, src, -+ offset))); - return; - } - -@@ -6969,11 +7007,19 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, - if (src != const0_rtx - && aarch64_sve_addvl_addpl_immediate_p (poly_offset)) - { -- rtx offset_rtx = gen_int_mode (poly_offset, mode); -+ rtx offset_rtx; -+ if (force_isa_mode == 0) -+ offset_rtx = gen_int_mode (poly_offset, mode); -+ else -+ offset_rtx = aarch64_sme_vq_immediate (mode, factor, 0); - if (frame_related_p) - { - rtx_insn *insn = emit_insn (gen_add3_insn (dest, src, offset_rtx)); - RTX_FRAME_RELATED_P (insn) = true; -+ if (force_isa_mode & AARCH64_FL_SM_ON) -+ add_reg_note (insn, REG_CFA_ADJUST_CFA, -+ gen_rtx_SET (dest, plus_constant (Pmode, src, -+ poly_offset))); - src = dest; - } - else -@@ -7004,9 +7050,19 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, - rtx val; - if (IN_RANGE (rel_factor, -32, 31)) - { -+ if (force_isa_mode & AARCH64_FL_SM_ON) -+ { -+ /* Try to use an unshifted RDSVL, otherwise fall back on -+ a shifted RDSVL #1. */ -+ if (aarch64_sve_rdvl_addvl_factor_p (factor)) -+ shift = 0; -+ else -+ factor = rel_factor * 16; -+ val = aarch64_sme_vq_immediate (mode, factor, 0); -+ } - /* Try to use an unshifted CNT[BHWD] or RDVL. */ -- if (aarch64_sve_cnt_factor_p (factor) -- || aarch64_sve_rdvl_addvl_factor_p (factor)) -+ else if (aarch64_sve_cnt_factor_p (factor) -+ || aarch64_sve_rdvl_addvl_factor_p (factor)) - { - val = gen_int_mode (poly_int64 (factor, factor), mode); - shift = 0; -@@ -7036,11 +7092,18 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, - a shift and add sequence for the multiplication. - If CNTB << SHIFT is out of range, stick with the current - shift factor. */ -- if (IN_RANGE (low_bit, 2, 16 * 16)) -+ if (force_isa_mode == 0 -+ && IN_RANGE (low_bit, 2, 16 * 16)) - { - val = gen_int_mode (poly_int64 (low_bit, low_bit), mode); - shift = 0; - } -+ else if ((force_isa_mode & AARCH64_FL_SM_ON) -+ && aarch64_sve_rdvl_addvl_factor_p (low_bit)) -+ { -+ val = aarch64_sme_vq_immediate (mode, low_bit, 0); -+ shift = 0; -+ } - else - val = gen_int_mode (BYTES_PER_SVE_VECTOR, mode); - -@@ -7128,30 +7191,34 @@ aarch64_split_add_offset (scalar_int_mode mode, rtx dest, rtx src, - rtx offset_rtx, rtx temp1, rtx temp2) - { - aarch64_add_offset (mode, dest, src, rtx_to_poly_int64 (offset_rtx), -- temp1, temp2, false); -+ temp1, temp2, 0, false); - } - - /* Add DELTA to the stack pointer, marking the instructions frame-related. -- TEMP1 is available as a temporary if nonnull. EMIT_MOVE_IMM is false -- if TEMP1 already contains abs (DELTA). */ -+ TEMP1 is available as a temporary if nonnull. FORCE_ISA_MODE is as -+ for aarch64_add_offset. EMIT_MOVE_IMM is false if TEMP1 already -+ contains abs (DELTA). */ - - static inline void --aarch64_add_sp (rtx temp1, rtx temp2, poly_int64 delta, bool emit_move_imm) -+aarch64_add_sp (rtx temp1, rtx temp2, poly_int64 delta, -+ aarch64_feature_flags force_isa_mode, bool emit_move_imm) - { - aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, delta, -- temp1, temp2, true, emit_move_imm); -+ temp1, temp2, force_isa_mode, true, emit_move_imm); - } - - /* Subtract DELTA from the stack pointer, marking the instructions -- frame-related if FRAME_RELATED_P. TEMP1 is available as a temporary -- if nonnull. */ -+ frame-related if FRAME_RELATED_P. FORCE_ISA_MODE is as for -+ aarch64_add_offset. TEMP1 is available as a temporary if nonnull. */ - - static inline void --aarch64_sub_sp (rtx temp1, rtx temp2, poly_int64 delta, bool frame_related_p, -- bool emit_move_imm = true) -+aarch64_sub_sp (rtx temp1, rtx temp2, poly_int64 delta, -+ aarch64_feature_flags force_isa_mode, -+ bool frame_related_p, bool emit_move_imm = true) - { - aarch64_add_offset (Pmode, stack_pointer_rtx, stack_pointer_rtx, -delta, -- temp1, temp2, frame_related_p, emit_move_imm); -+ temp1, temp2, force_isa_mode, frame_related_p, -+ emit_move_imm); - } - - /* A streaming-compatible function needs to switch temporarily to the known -@@ -8176,11 +8243,11 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) - { - base = aarch64_force_temporary (int_mode, dest, base); - aarch64_add_offset (int_mode, dest, base, offset, -- NULL_RTX, NULL_RTX, false); -+ NULL_RTX, NULL_RTX, 0, false); - } - else - aarch64_add_offset (int_mode, dest, base, offset, -- dest, NULL_RTX, false); -+ dest, NULL_RTX, 0, false); - } - return; - } -@@ -8207,7 +8274,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) - gcc_assert (can_create_pseudo_p ()); - base = aarch64_force_temporary (int_mode, dest, base); - aarch64_add_offset (int_mode, dest, base, const_offset, -- NULL_RTX, NULL_RTX, false); -+ NULL_RTX, NULL_RTX, 0, false); - return; - } - -@@ -8247,7 +8314,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) - gcc_assert(can_create_pseudo_p ()); - base = aarch64_force_temporary (int_mode, dest, base); - aarch64_add_offset (int_mode, dest, base, const_offset, -- NULL_RTX, NULL_RTX, false); -+ NULL_RTX, NULL_RTX, 0, false); - return; - } - /* FALLTHRU */ -@@ -9755,6 +9822,9 @@ aarch64_need_old_pstate_sm () - if (aarch64_cfun_incoming_pstate_sm () != 0) - return false; - -+ if (aarch64_cfun_enables_pstate_sm ()) -+ return true; -+ - if (cfun->machine->call_switches_pstate_sm) - for (auto insn = get_insns (); insn; insn = NEXT_INSN (insn)) - if (auto *call = dyn_cast (insn)) -@@ -9781,6 +9851,7 @@ aarch64_layout_frame (void) - bool frame_related_fp_reg_p = false; - aarch64_frame &frame = cfun->machine->frame; - poly_int64 top_of_locals = -1; -+ bool enables_pstate_sm = aarch64_cfun_enables_pstate_sm (); - - vec_safe_truncate (frame.saved_gprs, 0); - vec_safe_truncate (frame.saved_fprs, 0); -@@ -9818,7 +9889,7 @@ aarch64_layout_frame (void) - frame.reg_offset[regno] = SLOT_REQUIRED; - - for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) -- if (df_regs_ever_live_p (regno) -+ if ((enables_pstate_sm || df_regs_ever_live_p (regno)) - && !fixed_regs[regno] - && !crtl->abi->clobbers_full_reg_p (regno)) - { -@@ -9847,7 +9918,7 @@ aarch64_layout_frame (void) - } - - for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++) -- if (df_regs_ever_live_p (regno) -+ if ((enables_pstate_sm || df_regs_ever_live_p (regno)) - && !fixed_regs[regno] - && !crtl->abi->clobbers_full_reg_p (regno)) - frame.reg_offset[regno] = SLOT_REQUIRED; -@@ -9964,7 +10035,8 @@ aarch64_layout_frame (void) - /* If the current function changes the SVE vector length, ensure that the - old value of the DWARF VG register is saved and available in the CFI, - so that outer frames with VL-sized offsets can be processed correctly. */ -- if (cfun->machine->call_switches_pstate_sm) -+ if (cfun->machine->call_switches_pstate_sm -+ || aarch64_cfun_enables_pstate_sm ()) - { - frame.reg_offset[VG_REGNUM] = offset; - offset += UNITS_PER_WORD; -@@ -10749,9 +10821,16 @@ aarch64_get_separate_components (void) - bitmap_clear (components); - - /* The registers we need saved to the frame. */ -+ bool enables_pstate_sm = aarch64_cfun_enables_pstate_sm (); - for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++) - if (aarch64_register_saved_on_entry (regno)) - { -+ /* Disallow shrink wrapping for registers that will be clobbered -+ by an SMSTART SM in the prologue. */ -+ if (enables_pstate_sm -+ && (FP_REGNUM_P (regno) || PR_REGNUM_P (regno))) -+ continue; -+ - /* Punt on saves and restores that use ST1D and LD1D. We could - try to be smarter, but it would involve making sure that the - spare predicate register itself is safe to use at the save -@@ -11070,11 +11149,16 @@ aarch64_emit_stack_tie (rtx reg) - events, e.g. if we were to allow the stack to be dropped by more than a page - and then have multiple probes up and we take a signal somewhere in between - then the signal handler doesn't know the state of the stack and can make no -- assumptions about which pages have been probed. */ -+ assumptions about which pages have been probed. -+ -+ FORCE_ISA_MODE is AARCH64_FL_SM_ON if any variable component of POLY_SIZE -+ is measured relative to the SME vector length instead of the current -+ prevailing vector length. It is 0 otherwise. */ - - static void - aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - poly_int64 poly_size, -+ aarch64_feature_flags force_isa_mode, - bool frame_related_p, - bool final_adjustment_p) - { -@@ -11116,7 +11200,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - if (known_lt (poly_size, min_probe_threshold) - || !flag_stack_clash_protection) - { -- aarch64_sub_sp (temp1, temp2, poly_size, frame_related_p); -+ aarch64_sub_sp (temp1, temp2, poly_size, force_isa_mode, -+ frame_related_p); - return; - } - -@@ -11133,7 +11218,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - - /* First calculate the amount of bytes we're actually spilling. */ - aarch64_add_offset (Pmode, temp1, CONST0_RTX (Pmode), -- poly_size, temp1, temp2, false, true); -+ poly_size, temp1, temp2, force_isa_mode, -+ false, true); - - rtx_insn *insn = get_last_insn (); - -@@ -11191,7 +11277,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - { - for (HOST_WIDE_INT i = 0; i < rounded_size; i += guard_size) - { -- aarch64_sub_sp (NULL, temp2, guard_size, true); -+ aarch64_sub_sp (NULL, temp2, guard_size, force_isa_mode, true); - emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, - guard_used_by_caller)); - emit_insn (gen_blockage ()); -@@ -11202,7 +11288,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - { - /* Compute the ending address. */ - aarch64_add_offset (Pmode, temp1, stack_pointer_rtx, -rounded_size, -- temp1, NULL, false, true); -+ temp1, NULL, force_isa_mode, false, true); - rtx_insn *insn = get_last_insn (); - - /* For the initial allocation, we don't have a frame pointer -@@ -11268,7 +11354,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - if (final_adjustment_p && rounded_size != 0) - min_probe_threshold = 0; - -- aarch64_sub_sp (temp1, temp2, residual, frame_related_p); -+ aarch64_sub_sp (temp1, temp2, residual, force_isa_mode, frame_related_p); - if (residual >= min_probe_threshold) - { - if (dump_file) -@@ -11333,6 +11419,14 @@ aarch64_epilogue_uses (int regno) - return 0; - } - -+/* Implement TARGET_USE_LATE_PROLOGUE_EPILOGUE. */ -+ -+static bool -+aarch64_use_late_prologue_epilogue () -+{ -+ return aarch64_cfun_enables_pstate_sm (); -+} -+ - /* The current function's frame has a save slot for the incoming state - of SVCR. Return a legitimate memory for the slot, based on the hard - frame pointer. */ -@@ -11469,6 +11563,9 @@ aarch64_expand_prologue (void) - unsigned reg2 = frame.wb_push_candidate2; - bool emit_frame_chain = frame.emit_frame_chain; - rtx_insn *insn; -+ aarch64_feature_flags force_isa_mode = 0; -+ if (aarch64_cfun_enables_pstate_sm ()) -+ force_isa_mode = AARCH64_FL_SM_ON; - - if (flag_stack_clash_protection && known_eq (callee_adjust, 0)) - { -@@ -11530,7 +11627,7 @@ aarch64_expand_prologue (void) - less the amount of the guard reserved for use by the caller's - outgoing args. */ - aarch64_allocate_and_probe_stack_space (tmp0_rtx, tmp1_rtx, initial_adjust, -- true, false); -+ force_isa_mode, true, false); - - if (callee_adjust != 0) - aarch64_push_regs (reg1, reg2, callee_adjust); -@@ -11553,7 +11650,8 @@ aarch64_expand_prologue (void) - gcc_assert (known_eq (chain_offset, 0)); - aarch64_add_offset (Pmode, hard_frame_pointer_rtx, - stack_pointer_rtx, chain_offset, -- tmp1_rtx, tmp0_rtx, frame_pointer_needed); -+ tmp1_rtx, tmp0_rtx, force_isa_mode, -+ frame_pointer_needed); - if (frame_pointer_needed && !frame_size.is_constant ()) - { - /* Variable-sized frames need to describe the save slot -@@ -11600,6 +11698,7 @@ aarch64_expand_prologue (void) - || known_eq (initial_adjust, 0)); - aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, - sve_callee_adjust, -+ force_isa_mode, - !frame_pointer_needed, false); - bytes_below_sp -= sve_callee_adjust; - } -@@ -11612,12 +11711,15 @@ aarch64_expand_prologue (void) - that is assumed by the called. */ - gcc_assert (known_eq (bytes_below_sp, final_adjust)); - aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust, -+ force_isa_mode, - !frame_pointer_needed, true); - if (emit_frame_chain && maybe_ne (final_adjust, 0)) - aarch64_emit_stack_tie (hard_frame_pointer_rtx); - -- /* Save the incoming value of PSTATE.SM, if required. */ -- if (known_ge (frame.old_svcr_offset, 0)) -+ /* Save the incoming value of PSTATE.SM, if required. Code further -+ down does this for locally-streaming functions. */ -+ if (known_ge (frame.old_svcr_offset, 0) -+ && !aarch64_cfun_enables_pstate_sm ()) - { - rtx mem = aarch64_old_svcr_mem (); - MEM_VOLATILE_P (mem) = 1; -@@ -11649,6 +11751,34 @@ aarch64_expand_prologue (void) - emit_move_insn (gen_rtx_REG (DImode, R1_REGNUM), old_r1); - } - } -+ -+ /* Enable PSTATE.SM, if required. */ -+ if (aarch64_cfun_enables_pstate_sm ()) -+ { -+ rtx_insn *guard_label = nullptr; -+ if (known_ge (cfun->machine->frame.old_svcr_offset, 0)) -+ { -+ /* The current function is streaming-compatible. Save the -+ original state of PSTATE.SM. */ -+ rtx svcr = gen_rtx_REG (DImode, IP0_REGNUM); -+ emit_insn (gen_aarch64_read_svcr (svcr)); -+ emit_move_insn (aarch64_old_svcr_mem (), svcr); -+ guard_label = aarch64_guard_switch_pstate_sm (svcr, -+ aarch64_isa_flags); -+ } -+ aarch64_sme_mode_switch_regs args_switch; -+ auto &args = crtl->args.info; -+ for (unsigned int i = 0; i < args.num_sme_mode_switch_args; ++i) -+ { -+ rtx x = args.sme_mode_switch_args[i]; -+ args_switch.add_reg (GET_MODE (x), REGNO (x)); -+ } -+ args_switch.emit_prologue (); -+ emit_insn (gen_aarch64_smstart_sm ()); -+ args_switch.emit_epilogue (); -+ if (guard_label) -+ emit_label (guard_label); -+ } - } - - /* Return TRUE if we can use a simple_return insn. -@@ -11695,6 +11825,9 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) - HOST_WIDE_INT guard_size - = 1 << param_stack_clash_protection_guard_size; - HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; -+ aarch64_feature_flags force_isa_mode = 0; -+ if (aarch64_cfun_enables_pstate_sm ()) -+ force_isa_mode = AARCH64_FL_SM_ON; - - /* We can re-use the registers when: - -@@ -11719,6 +11852,24 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) - = maybe_ne (get_frame_size () - + frame.saved_varargs_size, 0); - -+ /* Reset PSTATE.SM, if required. */ -+ if (aarch64_cfun_enables_pstate_sm ()) -+ { -+ rtx_insn *guard_label = nullptr; -+ if (known_ge (cfun->machine->frame.old_svcr_offset, 0)) -+ guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM, -+ aarch64_isa_flags); -+ aarch64_sme_mode_switch_regs return_switch; -+ if (crtl->return_rtx && REG_P (crtl->return_rtx)) -+ return_switch.add_reg (GET_MODE (crtl->return_rtx), -+ REGNO (crtl->return_rtx)); -+ return_switch.emit_prologue (); -+ emit_insn (gen_aarch64_smstop_sm ()); -+ return_switch.emit_epilogue (); -+ if (guard_label) -+ emit_label (guard_label); -+ } -+ - /* Emit a barrier to prevent loads from a deallocated stack. */ - if (maybe_gt (final_adjust, crtl->outgoing_args_size) - || cfun->calls_alloca -@@ -11739,19 +11890,21 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) - aarch64_add_offset (Pmode, stack_pointer_rtx, - hard_frame_pointer_rtx, - -bytes_below_hard_fp + final_adjust, -- tmp1_rtx, tmp0_rtx, callee_adjust == 0); -+ tmp1_rtx, tmp0_rtx, force_isa_mode, -+ callee_adjust == 0); - else - /* The case where we need to re-use the register here is very rare, so - avoid the complicated condition and just always emit a move if the - immediate doesn't fit. */ -- aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true); -+ aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, force_isa_mode, true); - - /* Restore the vector registers before the predicate registers, - so that we can use P4 as a temporary for big-endian SVE frames. */ - aarch64_restore_callee_saves (final_adjust, frame.saved_fprs, &cfi_ops); - aarch64_restore_callee_saves (final_adjust, frame.saved_prs, &cfi_ops); - if (maybe_ne (sve_callee_adjust, 0)) -- aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true); -+ aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, -+ force_isa_mode, true); - - /* When shadow call stack is enabled, the scs_pop in the epilogue will - restore x30, we don't need to restore x30 again in the traditional -@@ -11781,7 +11934,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) - - /* Liveness of EP0_REGNUM can not be trusted across function calls either, so - add restriction on emit_move optimization to leaf functions. */ -- aarch64_add_sp (tmp0_rtx, tmp1_rtx, initial_adjust, -+ aarch64_add_sp (tmp0_rtx, tmp1_rtx, initial_adjust, force_isa_mode, - (!can_inherit_p || !crtl->is_leaf - || df_regs_ever_live_p (EP0_REGNUM))); - -@@ -11914,7 +12067,8 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, - temp1 = gen_rtx_REG (Pmode, EP1_REGNUM); - - if (vcall_offset == 0) -- aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, temp1, temp0, false); -+ aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, temp1, temp0, -+ 0, false); - else - { - gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0); -@@ -11927,7 +12081,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, - plus_constant (Pmode, this_rtx, delta)); - else - aarch64_add_offset (Pmode, this_rtx, this_rtx, delta, -- temp1, temp0, false); -+ temp1, temp0, 0, false); - } - - if (Pmode == ptr_mode) -@@ -30962,6 +31116,9 @@ aarch64_libgcc_floating_mode_supported_p - #undef TARGET_EXTRA_LIVE_ON_ENTRY - #define TARGET_EXTRA_LIVE_ON_ENTRY aarch64_extra_live_on_entry - -+#undef TARGET_USE_LATE_PROLOGUE_EPILOGUE -+#define TARGET_USE_LATE_PROLOGUE_EPILOGUE aarch64_use_late_prologue_epilogue -+ - #undef TARGET_EMIT_EPILOGUE_FOR_SIBCALL - #define TARGET_EMIT_EPILOGUE_FOR_SIBCALL aarch64_expand_epilogue - -diff --git a/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C b/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C -index 8b0755014..dc5c097bd 100644 ---- a/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C -+++ b/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C -@@ -7,3 +7,4 @@ void f4 () __arm_out("za"); - void f5 () __arm_inout("za"); - void f6 () __arm_preserves("za"); - __arm_new("za") void f7 () {} -+__arm_locally_streaming void f8 () {} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c b/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c -index fcabe3edc..22f5facfd 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c -@@ -7,3 +7,4 @@ void f4 () __arm_out("za"); - void f5 () __arm_inout("za"); - void f6 () __arm_preserves("za"); - __arm_new("za") void f7 () {} -+__arm_locally_streaming void f8 () {} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c -new file mode 100644 -index 000000000..20ff4b87d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c -@@ -0,0 +1,466 @@ -+// { dg-options "-O -fomit-frame-pointer" } -+// { dg-final { check-function-bodies "**" "" } } -+ -+void consume_za () [[arm::streaming, arm::inout("za")]]; -+ -+/* -+** n_ls: -+** sub sp, sp, #?80 -+** cntd x16 -+** str x16, \[sp\] -+** stp d8, d9, \[sp, #?16\] -+** stp d10, d11, \[sp, #?32\] -+** stp d12, d13, \[sp, #?48\] -+** stp d14, d15, \[sp, #?64\] -+** smstart sm -+** smstop sm -+** ldp d8, d9, \[sp, #?16\] -+** ldp d10, d11, \[sp, #?32\] -+** ldp d12, d13, \[sp, #?48\] -+** ldp d14, d15, \[sp, #?64\] -+** add sp, sp, #?80 -+** ret -+*/ -+[[arm::locally_streaming]] void -+n_ls () -+{ -+ asm (""); -+} -+ -+/* -+** s_ls: -+** ret -+*/ -+[[arm::locally_streaming]] void -+s_ls () [[arm::streaming]] -+{ -+ asm (""); -+} -+ -+/* -+** sc_ls: -+** stp x29, x30, \[sp, #?-96\]! -+** mov x29, sp -+** cntd x16 -+** str x16, \[sp, #?24\] -+** stp d8, d9, \[sp, #?32\] -+** stp d10, d11, \[sp, #?48\] -+** stp d12, d13, \[sp, #?64\] -+** stp d14, d15, \[sp, #?80\] -+** mrs x16, svcr -+** str x16, \[x29, #?16\] -+** tbnz x16, 0, [^\n]+ -+** smstart sm -+** ldr x16, \[x29, #?16\] -+** tbnz x16, 0, [^\n]+ -+** smstop sm -+** ldp d8, d9, \[sp, #?32\] -+** ldp d10, d11, \[sp, #?48\] -+** ldp d12, d13, \[sp, #?64\] -+** ldp d14, d15, \[sp, #?80\] -+** ldp x29, x30, \[sp\], #?96 -+** ret -+*/ -+[[arm::locally_streaming]] void -+sc_ls () [[arm::streaming_compatible]] -+{ -+ asm (""); -+} -+ -+/* -+** n_ls_new_za: -+** str x30, \[sp, #?-80\]! -+** cntd x16 -+** str x16, \[sp, #?8\] -+** stp d8, d9, \[sp, #?16\] -+** stp d10, d11, \[sp, #?32\] -+** stp d12, d13, \[sp, #?48\] -+** stp d14, d15, \[sp, #?64\] -+** smstart sm -+** mrs (x[0-9]+), tpidr2_el0 -+** cbz \1, [^\n]+ -+** bl __arm_tpidr2_save -+** msr tpidr2_el0, xzr -+** zero { za } -+** smstart za -+** bl consume_za -+** smstop za -+** smstop sm -+** ldp d8, d9, \[sp, #?16\] -+** ldp d10, d11, \[sp, #?32\] -+** ldp d12, d13, \[sp, #?48\] -+** ldp d14, d15, \[sp, #?64\] -+** ldr x30, \[sp\], #?80 -+** ret -+*/ -+[[arm::locally_streaming, arm::new("za")]] void -+n_ls_new_za () -+{ -+ consume_za (); -+ asm (""); -+} -+ -+/* -+** s_ls_new_za: -+** str x30, \[sp, #?-16\]! -+** mrs (x[0-9]+), tpidr2_el0 -+** cbz \1, [^\n]+ -+** bl __arm_tpidr2_save -+** msr tpidr2_el0, xzr -+** zero { za } -+** smstart za -+** bl consume_za -+** smstop za -+** ldr x30, \[sp\], #?16 -+** ret -+*/ -+[[arm::locally_streaming, arm::new("za")]] void -+s_ls_new_za () [[arm::streaming]] -+{ -+ consume_za (); -+ asm (""); -+} -+ -+/* -+** sc_ls_new_za: -+** stp x29, x30, \[sp, #?-96\]! -+** mov x29, sp -+** cntd x16 -+** str x16, \[sp, #?24\] -+** stp d8, d9, \[sp, #?32\] -+** stp d10, d11, \[sp, #?48\] -+** stp d12, d13, \[sp, #?64\] -+** stp d14, d15, \[sp, #?80\] -+** mrs x16, svcr -+** str x16, \[x29, #?16\] -+** tbnz x16, 0, [^\n]+ -+** smstart sm -+** mrs (x[0-9]+), tpidr2_el0 -+** cbz \1, [^\n]+ -+** bl __arm_tpidr2_save -+** msr tpidr2_el0, xzr -+** zero { za } -+** smstart za -+** bl consume_za -+** smstop za -+** ldr x16, \[x29, #?16\] -+** tbnz x16, 0, [^\n]+ -+** smstop sm -+** ldp d8, d9, \[sp, #?32\] -+** ldp d10, d11, \[sp, #?48\] -+** ldp d12, d13, \[sp, #?64\] -+** ldp d14, d15, \[sp, #?80\] -+** ldp x29, x30, \[sp\], #?96 -+** ret -+*/ -+[[arm::locally_streaming, arm::new("za")]] void -+sc_ls_new_za () [[arm::streaming_compatible]] -+{ -+ consume_za (); -+ asm (""); -+} -+ -+/* -+** n_ls_shared_za: -+** str x30, \[sp, #?-80\]! -+** cntd x16 -+** str x16, \[sp, #?8\] -+** stp d8, d9, \[sp, #?16\] -+** stp d10, d11, \[sp, #?32\] -+** stp d12, d13, \[sp, #?48\] -+** stp d14, d15, \[sp, #?64\] -+** smstart sm -+** bl consume_za -+** smstop sm -+** ldp d8, d9, \[sp, #?16\] -+** ldp d10, d11, \[sp, #?32\] -+** ldp d12, d13, \[sp, #?48\] -+** ldp d14, d15, \[sp, #?64\] -+** ldr x30, \[sp\], #?80 -+** ret -+*/ -+[[arm::locally_streaming]] void -+n_ls_shared_za () [[arm::inout("za")]] -+{ -+ consume_za (); -+ asm (""); -+} -+ -+/* -+** s_ls_shared_za: -+** str x30, \[sp, #?-16\]! -+** bl consume_za -+** ldr x30, \[sp\], #?16 -+** ret -+*/ -+[[arm::locally_streaming]] void -+s_ls_shared_za () [[arm::streaming, arm::inout("za")]] -+{ -+ consume_za (); -+ asm (""); -+} -+ -+/* -+** sc_ls_shared_za: -+** stp x29, x30, \[sp, #?-96\]! -+** mov x29, sp -+** cntd x16 -+** str x16, \[sp, #?24\] -+** stp d8, d9, \[sp, #?32\] -+** stp d10, d11, \[sp, #?48\] -+** stp d12, d13, \[sp, #?64\] -+** stp d14, d15, \[sp, #?80\] -+** mrs x16, svcr -+** str x16, \[x29, #?16\] -+** tbnz x16, 0, [^\n]+ -+** smstart sm -+** bl consume_za -+** ldr x16, \[x29, #?16\] -+** tbnz x16, 0, [^\n]+ -+** smstop sm -+** ldp d8, d9, \[sp, #?32\] -+** ldp d10, d11, \[sp, #?48\] -+** ldp d12, d13, \[sp, #?64\] -+** ldp d14, d15, \[sp, #?80\] -+** ldp x29, x30, \[sp\], #?96 -+** ret -+*/ -+[[arm::locally_streaming]] void -+sc_ls_shared_za () [[arm::streaming_compatible, arm::inout("za")]] -+{ -+ consume_za (); -+ asm (""); -+} -+ -+/* -+** n_ls_vector_pcs: -+** sub sp, sp, #?272 -+** cntd x16 -+** str x16, \[sp\] -+** stp q8, q9, \[sp, #?16\] -+** stp q10, q11, \[sp, #?48\] -+** stp q12, q13, \[sp, #?80\] -+** stp q14, q15, \[sp, #?112\] -+** stp q16, q17, \[sp, #?144\] -+** stp q18, q19, \[sp, #?176\] -+** stp q20, q21, \[sp, #?208\] -+** stp q22, q23, \[sp, #?240\] -+** smstart sm -+** smstop sm -+** ldp q8, q9, \[sp, #?16\] -+** ldp q10, q11, \[sp, #?48\] -+** ldp q12, q13, \[sp, #?80\] -+** ldp q14, q15, \[sp, #?112\] -+** ldp q16, q17, \[sp, #?144\] -+** ldp q18, q19, \[sp, #?176\] -+** ldp q20, q21, \[sp, #?208\] -+** ldp q22, q23, \[sp, #?240\] -+** add sp, sp, #?272 -+** ret -+*/ -+[[arm::locally_streaming]] void __attribute__((aarch64_vector_pcs)) -+n_ls_vector_pcs () -+{ -+ asm (""); -+} -+ -+/* -+** n_ls_sve_pcs: -+** sub sp, sp, #?16 -+** cntd x16 -+** str x16, \[sp\] -+** addsvl sp, sp, #-18 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str p12, \[sp, #8, mul vl\] -+** str p13, \[sp, #9, mul vl\] -+** str p14, \[sp, #10, mul vl\] -+** str p15, \[sp, #11, mul vl\] -+** str z8, \[sp, #2, mul vl\] -+** str z9, \[sp, #3, mul vl\] -+** str z10, \[sp, #4, mul vl\] -+** str z11, \[sp, #5, mul vl\] -+** str z12, \[sp, #6, mul vl\] -+** str z13, \[sp, #7, mul vl\] -+** str z14, \[sp, #8, mul vl\] -+** str z15, \[sp, #9, mul vl\] -+** str z16, \[sp, #10, mul vl\] -+** str z17, \[sp, #11, mul vl\] -+** str z18, \[sp, #12, mul vl\] -+** str z19, \[sp, #13, mul vl\] -+** str z20, \[sp, #14, mul vl\] -+** str z21, \[sp, #15, mul vl\] -+** str z22, \[sp, #16, mul vl\] -+** str z23, \[sp, #17, mul vl\] -+** addvl sp, sp, #-1 -+** str p0, \[sp\] -+** smstart sm -+** ldr p0, \[sp\] -+** addvl sp, sp, #1 -+** smstop sm -+** ldr z8, \[sp, #2, mul vl\] -+** ldr z9, \[sp, #3, mul vl\] -+** ldr z10, \[sp, #4, mul vl\] -+** ldr z11, \[sp, #5, mul vl\] -+** ldr z12, \[sp, #6, mul vl\] -+** ldr z13, \[sp, #7, mul vl\] -+** ldr z14, \[sp, #8, mul vl\] -+** ldr z15, \[sp, #9, mul vl\] -+** ldr z16, \[sp, #10, mul vl\] -+** ldr z17, \[sp, #11, mul vl\] -+** ldr z18, \[sp, #12, mul vl\] -+** ldr z19, \[sp, #13, mul vl\] -+** ldr z20, \[sp, #14, mul vl\] -+** ldr z21, \[sp, #15, mul vl\] -+** ldr z22, \[sp, #16, mul vl\] -+** ldr z23, \[sp, #17, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** ldr p12, \[sp, #8, mul vl\] -+** ldr p13, \[sp, #9, mul vl\] -+** ldr p14, \[sp, #10, mul vl\] -+** ldr p15, \[sp, #11, mul vl\] -+** addsvl sp, sp, #18 -+** add sp, sp, #?16 -+** ret -+*/ -+[[arm::locally_streaming]] void -+n_ls_sve_pcs (__SVBool_t x) -+{ -+ asm (""); -+} -+ -+/* -+** n_ls_v0: -+** addsvl sp, sp, #-1 -+** ... -+** smstart sm -+** add x[0-9]+, [^\n]+ -+** smstop sm -+** ... -+** addsvl sp, sp, #1 -+** ... -+*/ -+#define TEST(VN) __SVInt32_t VN; asm ("" :: "r" (&VN)); -+[[arm::locally_streaming]] void -+n_ls_v0 () -+{ -+ TEST (v0); -+} -+ -+/* -+** n_ls_v32: -+** addsvl sp, sp, #-32 -+** ... -+** smstart sm -+** ... -+** smstop sm -+** ... -+** rdsvl (x[0-9]+), #1 -+** lsl (x[0-9]+), \1, #?5 -+** add sp, sp, \2 -+** ... -+*/ -+[[arm::locally_streaming]] void -+n_ls_v32 () -+{ -+ TEST (v0); -+ TEST (v1); -+ TEST (v2); -+ TEST (v3); -+ TEST (v4); -+ TEST (v5); -+ TEST (v6); -+ TEST (v7); -+ TEST (v8); -+ TEST (v9); -+ TEST (v10); -+ TEST (v11); -+ TEST (v12); -+ TEST (v13); -+ TEST (v14); -+ TEST (v15); -+ TEST (v16); -+ TEST (v17); -+ TEST (v18); -+ TEST (v19); -+ TEST (v20); -+ TEST (v21); -+ TEST (v22); -+ TEST (v23); -+ TEST (v24); -+ TEST (v25); -+ TEST (v26); -+ TEST (v27); -+ TEST (v28); -+ TEST (v29); -+ TEST (v30); -+ TEST (v31); -+} -+ -+/* -+** n_ls_v33: -+** rdsvl (x[0-9]+), #1 -+** mov (x[0-9]+), #?33 -+** mul (x[0-9]+), (?:\1, \2|\2, \1) -+** sub sp, sp, \3 -+** ... -+** smstart sm -+** ... -+** smstop sm -+** ... -+** rdsvl (x[0-9]+), #1 -+** mov (x[0-9]+), #?33 -+** mul (x[0-9]+), (?:\4, \5|\5, \4) -+** add sp, sp, \6 -+** ... -+*/ -+[[arm::locally_streaming]] void -+n_ls_v33 () -+{ -+ TEST (v0); -+ TEST (v1); -+ TEST (v2); -+ TEST (v3); -+ TEST (v4); -+ TEST (v5); -+ TEST (v6); -+ TEST (v7); -+ TEST (v8); -+ TEST (v9); -+ TEST (v10); -+ TEST (v11); -+ TEST (v12); -+ TEST (v13); -+ TEST (v14); -+ TEST (v15); -+ TEST (v16); -+ TEST (v17); -+ TEST (v18); -+ TEST (v19); -+ TEST (v20); -+ TEST (v21); -+ TEST (v22); -+ TEST (v23); -+ TEST (v24); -+ TEST (v25); -+ TEST (v26); -+ TEST (v27); -+ TEST (v28); -+ TEST (v29); -+ TEST (v30); -+ TEST (v31); -+ TEST (v32); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_2.c b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_2.c -new file mode 100644 -index 000000000..0eba99385 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_2.c -@@ -0,0 +1,177 @@ -+// { dg-options "-O -fomit-frame-pointer" } -+// { dg-final { check-function-bodies "**" "" } } -+ -+#include -+#include -+ -+/* -+** test_d0: -+** ... -+** smstart sm -+** ... -+** fmov x10, d0 -+** smstop sm -+** fmov d0, x10 -+** ... -+*/ -+[[arm::locally_streaming]] double -+test_d0 () -+{ -+ asm (""); -+ return 1.0f; -+} -+ -+/* -+** test_d0_vec: -+** ... -+** smstart sm -+** ... -+** ( -+** fmov x10, d0 -+** | -+** umov x10, v0.d\[0\] -+** ) -+** smstop sm -+** fmov d0, x10 -+** ... -+*/ -+[[arm::locally_streaming]] int8x8_t -+test_d0_vec () -+{ -+ asm (""); -+ return (int8x8_t) {}; -+} -+ -+/* -+** test_q0: -+** ... -+** smstart sm -+** ... -+** str q0, \[sp, #?-16\]! -+** smstop sm -+** ldr q0, \[sp\], #?16 -+** ... -+*/ -+[[arm::locally_streaming]] int8x16_t -+test_q0 () -+{ -+ asm (""); -+ return (int8x16_t) {}; -+} -+ -+/* -+** test_q1: -+** ... -+** smstart sm -+** ... -+** stp q0, q1, \[sp, #?-32\]! -+** smstop sm -+** ldp q0, q1, \[sp\], #?32 -+** ... -+*/ -+[[arm::locally_streaming]] int8x16x2_t -+test_q1 () -+{ -+ asm (""); -+ return (int8x16x2_t) {}; -+} -+ -+/* -+** test_q2: -+** ... -+** smstart sm -+** ... -+** stp q0, q1, \[sp, #?-48\]! -+** str q2, \[sp, #?32\] -+** smstop sm -+** ldr q2, \[sp, #?32\] -+** ldp q0, q1, \[sp\], #?48 -+** ... -+*/ -+[[arm::locally_streaming]] int8x16x3_t -+test_q2 () -+{ -+ asm (""); -+ return (int8x16x3_t) {}; -+} -+ -+/* -+** test_q3: -+** ... -+** smstart sm -+** ... -+** stp q0, q1, \[sp, #?-64\]! -+** stp q2, q3, \[sp, #?32\] -+** smstop sm -+** ldp q2, q3, \[sp, #?32\] -+** ldp q0, q1, \[sp\], #?64 -+** ... -+*/ -+[[arm::locally_streaming]] int8x16x4_t -+test_q3 () -+{ -+ asm (""); -+ return (int8x16x4_t) {}; -+} -+ -+/* -+** test_z0: -+** ... -+** smstart sm -+** mov z0\.b, #0 -+** addvl sp, sp, #-1 -+** str z0, \[sp\] -+** smstop sm -+** ldr z0, \[sp\] -+** addvl sp, sp, #1 -+** ... -+*/ -+[[arm::locally_streaming]] svint8_t -+test_z0 () -+{ -+ asm (""); -+ return (svint8_t) {}; -+} -+ -+/* -+** test_z3: -+** ... -+** smstart sm -+** ... -+** addvl sp, sp, #-4 -+** str z0, \[sp\] -+** str z1, \[sp, #1, mul vl\] -+** str z2, \[sp, #2, mul vl\] -+** str z3, \[sp, #3, mul vl\] -+** smstop sm -+** ldr z0, \[sp\] -+** ldr z1, \[sp, #1, mul vl\] -+** ldr z2, \[sp, #2, mul vl\] -+** ldr z3, \[sp, #3, mul vl\] -+** ... -+*/ -+[[arm::locally_streaming]] svint8x4_t -+test_z3 () -+{ -+ asm (""); -+ return (svint8x4_t) {}; -+} -+ -+/* -+** test_p0: -+** ... -+** smstart sm -+** pfalse p0\.b -+** addvl sp, sp, #-1 -+** str p0, \[sp\] -+** smstop sm -+** ldr p0, \[sp\] -+** addvl sp, sp, #1 -+** ... -+*/ -+[[arm::locally_streaming]] svbool_t -+test_p0 () -+{ -+ asm (""); -+ return (svbool_t) {}; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_3.c b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_3.c -new file mode 100644 -index 000000000..2bdea6ac6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_3.c -@@ -0,0 +1,273 @@ -+// { dg-options "-O -fomit-frame-pointer" } -+// { dg-final { check-function-bodies "**" "" } } -+ -+#include -+#include -+ -+/* -+** test_d0: -+** ... -+** fmov x10, d0 -+** smstart sm -+** fmov d0, x10 -+** smstop sm -+** ... -+*/ -+[[arm::locally_streaming]] void -+test_d0 (double d0) -+{ -+ asm (""); -+} -+ -+/* -+** test_d7: -+** ... -+** fmov x10, d0 -+** fmov x11, d1 -+** fmov x12, d2 -+** fmov x13, d3 -+** fmov x14, d4 -+** fmov x15, d5 -+** fmov x16, d6 -+** fmov x17, d7 -+** smstart sm -+** fmov d0, x10 -+** fmov d1, x11 -+** fmov d2, x12 -+** fmov d3, x13 -+** fmov d4, x14 -+** fmov d5, x15 -+** fmov d6, x16 -+** fmov d7, x17 -+** smstop sm -+** ... -+*/ -+[[arm::locally_streaming]] void -+test_d7 (double d0, double d1, double d2, double d3, -+ double d4, double d5, double d6, double d7) -+{ -+ asm (""); -+} -+ -+/* -+** test_d0_vec: -+** ... -+** ( -+** fmov x10, d0 -+** | -+** umov x10, v0.d\[0\] -+** ) -+** smstart sm -+** fmov d0, x10 -+** smstop sm -+** ... -+*/ -+[[arm::locally_streaming]] void -+test_d0_vec (int8x8_t d0) -+{ -+ asm (""); -+} -+ -+/* -+** test_d7_vec: -+** ... -+** ( -+** fmov x10, d0 -+** fmov x11, d1 -+** fmov x12, d2 -+** fmov x13, d3 -+** fmov x14, d4 -+** fmov x15, d5 -+** fmov x16, d6 -+** fmov x17, d7 -+** | -+** umov x10, v0.d\[0\] -+** umov x11, v1.d\[0\] -+** umov x12, v2.d\[0\] -+** umov x13, v3.d\[0\] -+** umov x14, v4.d\[0\] -+** umov x15, v5.d\[0\] -+** umov x16, v6.d\[0\] -+** umov x17, v7.d\[0\] -+** ) -+** smstart sm -+** fmov d0, x10 -+** fmov d1, x11 -+** fmov d2, x12 -+** fmov d3, x13 -+** fmov d4, x14 -+** fmov d5, x15 -+** fmov d6, x16 -+** fmov d7, x17 -+** smstop sm -+** ... -+*/ -+[[arm::locally_streaming]] void -+test_d7_vec (int8x8_t d0, int8x8_t d1, int8x8_t d2, int8x8_t d3, -+ int8x8_t d4, int8x8_t d5, int8x8_t d6, int8x8_t d7) -+{ -+ asm (""); -+} -+ -+/* -+** test_q0: -+** ... -+** str q0, \[sp, #?-16\]! -+** smstart sm -+** ldr q0, \[sp\], #?16 -+** smstop sm -+** ... -+*/ -+[[arm::locally_streaming]] void -+test_q0 (int8x16_t q0) -+{ -+ asm (""); -+} -+ -+/* -+** test_q7: -+** ... -+** stp q0, q1, \[sp, #?-128\]! -+** stp q2, q3, \[sp, #?32\] -+** stp q4, q5, \[sp, #?64\] -+** stp q6, q7, \[sp, #?96\] -+** smstart sm -+** ldp q2, q3, \[sp, #?32\] -+** ldp q4, q5, \[sp, #?64\] -+** ldp q6, q7, \[sp, #?96\] -+** ldp q0, q1, \[sp\], #?128 -+** smstop sm -+** ... -+*/ -+[[arm::locally_streaming]] void -+test_q7 (int8x16x4_t q0, int8x16x4_t q4) -+{ -+ asm (""); -+} -+ -+/* -+** test_z0: -+** ... -+** addvl sp, sp, #-1 -+** str z0, \[sp\] -+** smstart sm -+** ldr z0, \[sp\] -+** addvl sp, sp, #1 -+** smstop sm -+** ... -+*/ -+[[arm::locally_streaming]] void -+test_z0 (svint8_t z0) -+{ -+ asm (""); -+} -+ -+/* -+** test_z7: -+** ... -+** addvl sp, sp, #-8 -+** str z0, \[sp\] -+** str z1, \[sp, #1, mul vl\] -+** str z2, \[sp, #2, mul vl\] -+** str z3, \[sp, #3, mul vl\] -+** str z4, \[sp, #4, mul vl\] -+** str z5, \[sp, #5, mul vl\] -+** str z6, \[sp, #6, mul vl\] -+** str z7, \[sp, #7, mul vl\] -+** smstart sm -+** ldr z0, \[sp\] -+** ldr z1, \[sp, #1, mul vl\] -+** ldr z2, \[sp, #2, mul vl\] -+** ldr z3, \[sp, #3, mul vl\] -+** ldr z4, \[sp, #4, mul vl\] -+** ldr z5, \[sp, #5, mul vl\] -+** ldr z6, \[sp, #6, mul vl\] -+** ldr z7, \[sp, #7, mul vl\] -+** addvl sp, sp, #8 -+** smstop sm -+** ... -+*/ -+[[arm::locally_streaming]] void -+test_z7 (svint8x4_t z0, svint8x4_t z4) -+{ -+ asm (""); -+} -+ -+/* -+** test_p0: -+** ... -+** addvl sp, sp, #-1 -+** str p0, \[sp\] -+** smstart sm -+** ldr p0, \[sp\] -+** addvl sp, sp, #1 -+** smstop sm -+** ... -+*/ -+[[arm::locally_streaming]] void -+test_p0 (svbool_t p0) -+{ -+ asm (""); -+} -+ -+/* -+** test_p3: -+** ... -+** addvl sp, sp, #-1 -+** str p0, \[sp\] -+** str p1, \[sp, #1, mul vl\] -+** str p2, \[sp, #2, mul vl\] -+** str p3, \[sp, #3, mul vl\] -+** smstart sm -+** ldr p0, \[sp\] -+** ldr p1, \[sp, #1, mul vl\] -+** ldr p2, \[sp, #2, mul vl\] -+** ldr p3, \[sp, #3, mul vl\] -+** addvl sp, sp, #1 -+** smstop sm -+** ... -+*/ -+[[arm::locally_streaming]] void -+test_p3 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ asm (""); -+} -+ -+/* -+** test_mixed: -+** ... -+** addvl sp, sp, #-3 -+** str p0, \[sp\] -+** str p1, \[sp, #1, mul vl\] -+** str p2, \[sp, #2, mul vl\] -+** str p3, \[sp, #3, mul vl\] -+** str z3, \[sp, #1, mul vl\] -+** str z7, \[sp, #2, mul vl\] -+** stp q2, q6, \[sp, #?-32\]! -+** fmov w10, s0 -+** fmov x11, d1 -+** fmov w12, s4 -+** fmov x13, d5 -+** smstart sm -+** fmov s0, w10 -+** fmov d1, x11 -+** fmov s4, w12 -+** fmov d5, x13 -+** ldp q2, q6, \[sp\], #?32 -+** ldr p0, \[sp\] -+** ldr p1, \[sp, #1, mul vl\] -+** ldr p2, \[sp, #2, mul vl\] -+** ldr p3, \[sp, #3, mul vl\] -+** ldr z3, \[sp, #1, mul vl\] -+** ldr z7, \[sp, #2, mul vl\] -+** addvl sp, sp, #3 -+** smstop sm -+** ... -+*/ -+[[arm::locally_streaming]] void -+test_mixed (float s0, double d1, float32x4_t q2, svfloat32_t z3, -+ float s4, double d5, float64x2_t q6, svfloat64_t z7, -+ svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ asm (""); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_4.c b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_4.c -new file mode 100644 -index 000000000..42adeb152 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_4.c -@@ -0,0 +1,145 @@ -+// { dg-options "-O -fomit-frame-pointer" } -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+#include -+ -+/* -+** test_d0: -+** ... -+** smstart sm -+** ... -+** fmov x10, d0 -+** smstop sm -+** fmov d0, x10 -+** ... -+** smstart sm -+** ... -+** smstop sm -+** ... -+*/ -+void consume_d0 (double d0); -+ -+__arm_locally_streaming void -+test_d0 () -+{ -+ asm (""); -+ consume_d0 (1.0); -+ asm (""); -+} -+ -+/* -+** test_d7: -+** ... -+** fmov x10, d0 -+** fmov x11, d1 -+** fmov x12, d2 -+** fmov x13, d3 -+** fmov x14, d4 -+** fmov x15, d5 -+** fmov x16, d6 -+** fmov x17, d7 -+** smstop sm -+** fmov d0, x10 -+** fmov d1, x11 -+** fmov d2, x12 -+** fmov d3, x13 -+** fmov d4, x14 -+** fmov d5, x15 -+** fmov d6, x16 -+** fmov d7, x17 -+** ... -+*/ -+void consume_d7 (double d0, double d1, double d2, double d3, -+ double d4, double d5, double d6, double d7); -+__arm_locally_streaming void -+test_d7 () -+{ -+ asm (""); -+ consume_d7 (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); -+ asm (""); -+} -+ -+/* -+** test_q7: -+** ... -+** stp q0, q1, \[sp, #?-128\]! -+** stp q2, q3, \[sp, #?32\] -+** stp q4, q5, \[sp, #?64\] -+** stp q6, q7, \[sp, #?96\] -+** smstop sm -+** ldp q2, q3, \[sp, #?32\] -+** ldp q4, q5, \[sp, #?64\] -+** ldp q6, q7, \[sp, #?96\] -+** ldp q0, q1, \[sp\], #?128 -+** ... -+*/ -+void consume_q7 (int8x16x4_t q0, int8x16x4_t q4); -+ -+__arm_locally_streaming void -+test_q7 (int8x16x4_t *ptr) -+{ -+ asm (""); -+ consume_q7 (ptr[0], ptr[1]); -+ asm (""); -+} -+ -+/* -+** test_z7: -+** ... -+** addvl sp, sp, #-8 -+** str z0, \[sp\] -+** str z1, \[sp, #1, mul vl\] -+** str z2, \[sp, #2, mul vl\] -+** str z3, \[sp, #3, mul vl\] -+** str z4, \[sp, #4, mul vl\] -+** str z5, \[sp, #5, mul vl\] -+** str z6, \[sp, #6, mul vl\] -+** str z7, \[sp, #7, mul vl\] -+** smstop sm -+** ldr z0, \[sp\] -+** ldr z1, \[sp, #1, mul vl\] -+** ldr z2, \[sp, #2, mul vl\] -+** ldr z3, \[sp, #3, mul vl\] -+** ldr z4, \[sp, #4, mul vl\] -+** ldr z5, \[sp, #5, mul vl\] -+** ldr z6, \[sp, #6, mul vl\] -+** ldr z7, \[sp, #7, mul vl\] -+** addvl sp, sp, #8 -+** ... -+*/ -+void consume_z7 (svint8x4_t z0, svint8x4_t z4); -+ -+__arm_locally_streaming void -+test_z7 (svint8x4_t *ptr1, svint8x4_t *ptr2) -+{ -+ asm (""); -+ consume_z7 (*ptr1, *ptr2); -+ asm (""); -+} -+ -+/* -+** test_p3: -+** ... -+** addvl sp, sp, #-1 -+** str p0, \[sp\] -+** str p1, \[sp, #1, mul vl\] -+** str p2, \[sp, #2, mul vl\] -+** str p3, \[sp, #3, mul vl\] -+** smstop sm -+** ldr p0, \[sp\] -+** ldr p1, \[sp, #1, mul vl\] -+** ldr p2, \[sp, #2, mul vl\] -+** ldr p3, \[sp, #3, mul vl\] -+** addvl sp, sp, #1 -+** ... -+*/ -+void consume_p3 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3); -+ -+__arm_locally_streaming void -+test_p3 (svbool_t *ptr1, svbool_t *ptr2, svbool_t *ptr3, svbool_t *ptr4) -+{ -+ asm (""); -+ consume_p3 (*ptr1, *ptr2, *ptr3, *ptr4); -+ asm (""); -+} --- -2.33.0 - diff --git a/0213-Backport-SME-aarch64-Handle-PSTATE.SM-across-abnorma.patch b/0213-Backport-SME-aarch64-Handle-PSTATE.SM-across-abnorma.patch deleted file mode 100644 index f120a98..0000000 --- a/0213-Backport-SME-aarch64-Handle-PSTATE.SM-across-abnorma.patch +++ /dev/null @@ -1,708 +0,0 @@ -From ef9c800309fa326ca56dd9d9affd7d5498624bb8 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:29 +0000 -Subject: [PATCH 114/157] [Backport][SME] aarch64: Handle PSTATE.SM across - abnormal edges - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=275706fc59b5fdcc26d46d9f19951fc86b40c515 - -PSTATE.SM is always off on entry to an exception handler, and on entry -to a nonlocal goto receiver. Those entry points need to switch -PSTATE.SM back to the appropriate state for the current function. -In the case of streaming-compatible functions, they need to restore -the mode that the caller was originally using. - -The requirement on nonlocal goto receivers means that nonlocal -jumps need to ensure that PSTATE.SM is zero. - -gcc/ - * config/aarch64/aarch64.cc: Include except.h - (aarch64_sme_mode_switch_regs::add_call_preserved_reg): New function. - (aarch64_sme_mode_switch_regs::add_call_preserved_regs): Likewise. - (aarch64_need_old_pstate_sm): Return true if the function has - a nonlocal-goto or exception receiver. - (aarch64_switch_pstate_sm_for_landing_pad): New function. - (aarch64_switch_pstate_sm_for_jump): Likewise. - (pass_switch_pstate_sm::gate): Enable the pass for all - streaming and streaming-compatible functions. - (pass_switch_pstate_sm::execute): Handle non-local gotos and their - receivers. Handle exception handler entry points. - -gcc/testsuite/ - * g++.target/aarch64/sme/exceptions_2.C: New test. - * gcc.target/aarch64/sme/nonlocal_goto_1.c: Likewise. - * gcc.target/aarch64/sme/nonlocal_goto_2.c: Likewise. - * gcc.target/aarch64/sme/nonlocal_goto_3.c: Likewise. - * gcc.target/aarch64/sme/nonlocal_goto_4.c: Likewise. - * gcc.target/aarch64/sme/nonlocal_goto_5.c: Likewise. - * gcc.target/aarch64/sme/nonlocal_goto_6.c: Likewise. - * gcc.target/aarch64/sme/nonlocal_goto_7.c: Likewise. ---- - gcc/config/aarch64/aarch64.cc | 141 ++++++++++++++++- - .../g++.target/aarch64/sme/exceptions_2.C | 148 ++++++++++++++++++ - .../gcc.target/aarch64/sme/nonlocal_goto_1.c | 58 +++++++ - .../gcc.target/aarch64/sme/nonlocal_goto_2.c | 44 ++++++ - .../gcc.target/aarch64/sme/nonlocal_goto_3.c | 46 ++++++ - .../gcc.target/aarch64/sme/nonlocal_goto_4.c | 25 +++ - .../gcc.target/aarch64/sme/nonlocal_goto_5.c | 26 +++ - .../gcc.target/aarch64/sme/nonlocal_goto_6.c | 31 ++++ - .../gcc.target/aarch64/sme/nonlocal_goto_7.c | 25 +++ - 9 files changed, 537 insertions(+), 7 deletions(-) - create mode 100644 gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_7.c - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 4cb43c2e2..effb567c2 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -82,6 +82,7 @@ - #include "tree-dfa.h" - #include "asan.h" - #include "aarch64-feature-deps.h" -+#include "except.h" - #include "tree-pass.h" - #include "cfgbuild.h" - -@@ -7295,6 +7296,8 @@ public: - void add_reg (machine_mode, unsigned int); - void add_call_args (rtx_call_insn *); - void add_call_result (rtx_call_insn *); -+ void add_call_preserved_reg (unsigned int); -+ void add_call_preserved_regs (bitmap); - - void emit_prologue (); - void emit_epilogue (); -@@ -7427,6 +7430,46 @@ aarch64_sme_mode_switch_regs::add_call_result (rtx_call_insn *call_insn) - add_reg (GET_MODE (dest), REGNO (dest)); - } - -+/* REGNO is a register that is call-preserved under the current function's ABI. -+ Record that it must be preserved around the mode switch. */ -+ -+void -+aarch64_sme_mode_switch_regs::add_call_preserved_reg (unsigned int regno) -+{ -+ if (FP_REGNUM_P (regno)) -+ switch (crtl->abi->id ()) -+ { -+ case ARM_PCS_SVE: -+ add_reg (VNx16QImode, regno); -+ break; -+ case ARM_PCS_SIMD: -+ add_reg (V16QImode, regno); -+ break; -+ case ARM_PCS_AAPCS64: -+ add_reg (DImode, regno); -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ else if (PR_REGNUM_P (regno)) -+ add_reg (VNx16BImode, regno); -+} -+ -+/* The hard registers in REGS are call-preserved under the current function's -+ ABI. Record that they must be preserved around the mode switch. */ -+ -+void -+aarch64_sme_mode_switch_regs::add_call_preserved_regs (bitmap regs) -+{ -+ bitmap_iterator bi; -+ unsigned int regno; -+ EXECUTE_IF_SET_IN_BITMAP (regs, 0, regno, bi) -+ if (HARD_REGISTER_NUM_P (regno)) -+ add_call_preserved_reg (regno); -+ else -+ break; -+} -+ - /* Emit code to save registers before the mode switch. */ - - void -@@ -9825,6 +9868,23 @@ aarch64_need_old_pstate_sm () - if (aarch64_cfun_enables_pstate_sm ()) - return true; - -+ /* Non-local goto receivers are entered with PSTATE.SM equal to 0, -+ but the function needs to return with PSTATE.SM unchanged. */ -+ if (nonlocal_goto_handler_labels) -+ return true; -+ -+ /* Likewise for exception handlers. */ -+ eh_landing_pad lp; -+ for (unsigned int i = 1; vec_safe_iterate (cfun->eh->lp_array, i, &lp); ++i) -+ if (lp && lp->post_landing_pad) -+ return true; -+ -+ /* Non-local gotos need to set PSTATE.SM to zero. It's possible to call -+ streaming-compatible functions without SME being available, so PSTATE.SM -+ should only be changed if it is currently set to one. */ -+ if (crtl->has_nonlocal_goto) -+ return true; -+ - if (cfun->machine->call_switches_pstate_sm) - for (auto insn = get_insns (); insn; insn = NEXT_INSN (insn)) - if (auto *call = dyn_cast (insn)) -@@ -30209,6 +30269,59 @@ aarch64_md_asm_adjust (vec &outputs, vec &inputs, - return seq; - } - -+/* BB is the target of an exception or nonlocal goto edge, which means -+ that PSTATE.SM is known to be 0 on entry. Put it into the state that -+ the current function requires. */ -+ -+static bool -+aarch64_switch_pstate_sm_for_landing_pad (basic_block bb) -+{ -+ if (TARGET_NON_STREAMING) -+ return false; -+ -+ start_sequence (); -+ rtx_insn *guard_label = nullptr; -+ if (TARGET_STREAMING_COMPATIBLE) -+ guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM, -+ AARCH64_FL_SM_OFF); -+ aarch64_sme_mode_switch_regs args_switch; -+ args_switch.add_call_preserved_regs (df_get_live_in (bb)); -+ args_switch.emit_prologue (); -+ aarch64_switch_pstate_sm (AARCH64_FL_SM_OFF, AARCH64_FL_SM_ON); -+ args_switch.emit_epilogue (); -+ if (guard_label) -+ emit_label (guard_label); -+ auto seq = get_insns (); -+ end_sequence (); -+ -+ emit_insn_after (seq, bb_note (bb)); -+ return true; -+} -+ -+/* JUMP is a nonlocal goto. Its target requires PSTATE.SM to be 0 on entry, -+ so arrange to make it so. */ -+ -+static bool -+aarch64_switch_pstate_sm_for_jump (rtx_insn *jump) -+{ -+ if (TARGET_NON_STREAMING) -+ return false; -+ -+ start_sequence (); -+ rtx_insn *guard_label = nullptr; -+ if (TARGET_STREAMING_COMPATIBLE) -+ guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM, -+ AARCH64_FL_SM_OFF); -+ aarch64_switch_pstate_sm (AARCH64_FL_SM_ON, AARCH64_FL_SM_OFF); -+ if (guard_label) -+ emit_label (guard_label); -+ auto seq = get_insns (); -+ end_sequence (); -+ -+ emit_insn_before (seq, jump); -+ return true; -+} -+ - /* If CALL involves a change in PSTATE.SM, emit the instructions needed - to switch to the new mode and the instructions needed to restore the - original mode. Return true if something changed. */ -@@ -30292,9 +30405,10 @@ public: - }; - - bool --pass_switch_pstate_sm::gate (function *) -+pass_switch_pstate_sm::gate (function *fn) - { -- return cfun->machine->call_switches_pstate_sm; -+ return (aarch64_fndecl_pstate_sm (fn->decl) != AARCH64_FL_SM_OFF -+ || cfun->machine->call_switches_pstate_sm); - } - - /* Emit any instructions needed to switch PSTATE.SM. */ -@@ -30307,11 +30421,24 @@ pass_switch_pstate_sm::execute (function *fn) - bitmap_clear (blocks); - FOR_EACH_BB_FN (bb, fn) - { -- rtx_insn *insn; -- FOR_BB_INSNS (bb, insn) -- if (auto *call = dyn_cast (insn)) -- if (aarch64_switch_pstate_sm_for_call (call)) -- bitmap_set_bit (blocks, bb->index); -+ if (has_abnormal_call_or_eh_pred_edge_p (bb) -+ && aarch64_switch_pstate_sm_for_landing_pad (bb)) -+ bitmap_set_bit (blocks, bb->index); -+ -+ if (cfun->machine->call_switches_pstate_sm) -+ { -+ rtx_insn *insn; -+ FOR_BB_INSNS (bb, insn) -+ if (auto *call = dyn_cast (insn)) -+ if (aarch64_switch_pstate_sm_for_call (call)) -+ bitmap_set_bit (blocks, bb->index); -+ } -+ -+ auto end = BB_END (bb); -+ if (JUMP_P (end) -+ && find_reg_note (end, REG_NON_LOCAL_GOTO, NULL_RTX) -+ && aarch64_switch_pstate_sm_for_jump (end)) -+ bitmap_set_bit (blocks, bb->index); - } - find_many_sub_basic_blocks (blocks); - clear_aux_for_blocks (); -diff --git a/gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C b/gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C -new file mode 100644 -index 000000000..f791b6ecc ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sme/exceptions_2.C -@@ -0,0 +1,148 @@ -+// { dg-options "-O -fno-optimize-sibling-calls" } -+// { dg-final { check-function-bodies "**" "" } } -+ -+void n_callee(); -+void s_callee() __arm_streaming; -+void sc_callee() __arm_streaming_compatible; -+ -+void n_callee_ne() noexcept; -+void s_callee_ne() noexcept __arm_streaming; -+void sc_callee_ne() noexcept __arm_streaming_compatible; -+ -+void n_caller1() -+{ -+ try -+ { -+ n_callee(); -+ sc_callee(); -+ } -+ catch (...) -+ { -+ n_callee_ne(); -+ sc_callee_ne(); -+ } -+} -+// { dg-final { scan-assembler {_Z9n_caller1v:(?:(?!smstart|smstop).)*\tret} } } -+ -+/* -+** _Z9n_caller2v: -+** ... -+** cntd (x[0-9]+) -+** str \1, [^\n]+ -+** ... -+** bl __cxa_begin_catch -+** smstart sm -+** bl _Z11s_callee_nev -+** smstop sm -+** bl __cxa_end_catch -+** ... -+*/ -+void n_caller2() -+{ -+ try -+ { -+ n_callee(); -+ sc_callee(); -+ } -+ catch (...) -+ { -+ s_callee_ne(); -+ } -+} -+ -+/* -+** _Z9s_caller1v: -+** ... -+** bl __cxa_end_catch -+** smstart sm -+** ... -+*/ -+int s_caller1() __arm_streaming -+{ -+ try -+ { -+ s_callee(); -+ return 1; -+ } -+ catch (...) -+ { -+ return 2; -+ } -+} -+ -+/* -+** _Z9s_caller2v: -+** ... -+** bl __cxa_begin_catch -+** smstart sm -+** bl _Z11s_callee_nev -+** smstop sm -+** bl __cxa_end_catch -+** smstart sm -+** ... -+*/ -+int s_caller2() __arm_streaming -+{ -+ try -+ { -+ n_callee(); -+ return 1; -+ } -+ catch (...) -+ { -+ s_callee_ne(); -+ return 2; -+ } -+} -+ -+/* -+** _Z10sc_caller1v: -+** ... -+** cntd (x[0-9]+) -+** str \1, [^\n]+ -+** mrs (x[0-9]+), svcr -+** str \2, ([^\n]+) -+** ... -+** bl __cxa_end_catch -+** ldr (x[0-9]+), \3 -+** tbz \4, 0, [^\n]+ -+** smstart sm -+** ... -+*/ -+int sc_caller1() __arm_streaming_compatible -+{ -+ try -+ { -+ sc_callee(); -+ return 1; -+ } -+ catch (...) -+ { -+ return 2; -+ } -+} -+ -+/* -+** _Z10ls_caller1v: -+** ... -+** cntd (x[0-9]+) -+** str \1, [^\n]+ -+** ... -+** bl __cxa_begin_catch -+** smstart sm -+** bl _Z12sc_callee_nev -+** smstop sm -+** bl __cxa_end_catch -+** ... -+*/ -+__arm_locally_streaming void ls_caller1() -+{ -+ try -+ { -+ sc_callee(); -+ } -+ catch (...) -+ { -+ sc_callee_ne(); -+ } -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c -new file mode 100644 -index 000000000..4e3869fcc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_1.c -@@ -0,0 +1,58 @@ -+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+void run(void (*)()); -+ -+/* -+** foo: -+** ... -+** mrs x16, svcr -+** ... -+** str x16, (.*) -+** ... -+** ldr x16, \1 -+** tbz x16, 0, .* -+** smstop sm -+** bl __clear_cache -+** ldr x16, \1 -+** tbz x16, 0, .* -+** smstart sm -+** add x0, .* -+** ldr x16, \1 -+** tbz x16, 0, .* -+** smstop sm -+** bl run -+** ldr x16, \1 -+** tbz x16, 0, .* -+** smstart sm -+** mov w0, 1 -+** ... -+** ret -+** ldr x16, \1 -+** tbz x16, 0, .* -+** smstart sm -+** mov w0, 0 -+** ... -+*/ -+int -+foo (int *ptr) __arm_streaming_compatible -+{ -+ __label__ failure; -+ -+ void bar () { *ptr += 1; goto failure; } -+ run (bar); -+ return 1; -+ -+failure: -+ return 0; -+} -+ -+// { dg-final { scan-assembler {\tstp\tx19, x20,} } } -+// { dg-final { scan-assembler {\tstp\tx21, x22,} } } -+// { dg-final { scan-assembler {\tstp\tx23, x24,} } } -+// { dg-final { scan-assembler {\tstp\tx25, x26,} } } -+// { dg-final { scan-assembler {\tstp\tx27, x28,} } } -+// { dg-final { scan-assembler {\tstp\td8, d9,} } } -+// { dg-final { scan-assembler {\tstp\td10, d11,} } } -+// { dg-final { scan-assembler {\tstp\td12, d13,} } } -+// { dg-final { scan-assembler {\tstp\td14, d15,} } } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c -new file mode 100644 -index 000000000..2a2db72c3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_2.c -@@ -0,0 +1,44 @@ -+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+void run(void (*)()); -+ -+/* -+** foo: -+** ... -+** smstop sm -+** bl __clear_cache -+** smstart sm -+** add x0, .* -+** smstop sm -+** bl run -+** smstart sm -+** mov w0, 1 -+** ... -+** ret -+** smstart sm -+** mov w0, 0 -+** ... -+*/ -+int -+foo (int *ptr) __arm_streaming -+{ -+ __label__ failure; -+ -+ void bar () { *ptr += 1; goto failure; } -+ run (bar); -+ return 1; -+ -+failure: -+ return 0; -+} -+ -+// { dg-final { scan-assembler {\tstp\tx19, x20,} } } -+// { dg-final { scan-assembler {\tstp\tx21, x22,} } } -+// { dg-final { scan-assembler {\tstp\tx23, x24,} } } -+// { dg-final { scan-assembler {\tstp\tx25, x26,} } } -+// { dg-final { scan-assembler {\tstp\tx27, x28,} } } -+// { dg-final { scan-assembler {\tstp\td8, d9,} } } -+// { dg-final { scan-assembler {\tstp\td10, d11,} } } -+// { dg-final { scan-assembler {\tstp\td12, d13,} } } -+// { dg-final { scan-assembler {\tstp\td14, d15,} } } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c -new file mode 100644 -index 000000000..022b04052 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_3.c -@@ -0,0 +1,46 @@ -+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+void run(void (*)()); -+ -+/* -+** foo: -+** ... -+** smstart sm -+** ... -+** smstop sm -+** bl __clear_cache -+** smstart sm -+** add x0, .* -+** smstop sm -+** bl run -+** smstart sm -+** mov w0, 1 -+** ... -+** smstart sm -+** mov w0, 0 -+** smstop sm -+** ... -+*/ -+__arm_locally_streaming int -+foo (int *ptr) -+{ -+ __label__ failure; -+ -+ void bar () { *ptr += 1; goto failure; } -+ run (bar); -+ return 1; -+ -+failure: -+ return 0; -+} -+ -+// { dg-final { scan-assembler {\tstp\tx19, x20,} } } -+// { dg-final { scan-assembler {\tstp\tx21, x22,} } } -+// { dg-final { scan-assembler {\tstp\tx23, x24,} } } -+// { dg-final { scan-assembler {\tstp\tx25, x26,} } } -+// { dg-final { scan-assembler {\tstp\tx27, x28,} } } -+// { dg-final { scan-assembler {\tstp\td8, d9,} } } -+// { dg-final { scan-assembler {\tstp\td10, d11,} } } -+// { dg-final { scan-assembler {\tstp\td12, d13,} } } -+// { dg-final { scan-assembler {\tstp\td14, d15,} } } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c -new file mode 100644 -index 000000000..044607628 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_4.c -@@ -0,0 +1,25 @@ -+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+void run(void (*)()); -+ -+/* -+** bar.0: -+** ... -+** smstart sm -+** ... -+** smstop sm -+** br x[0-9]+ -+*/ -+int -+foo (int *ptr) -+{ -+ __label__ failure; -+ -+ __arm_locally_streaming void bar () { *ptr += 1; goto failure; } -+ run (bar); -+ return 1; -+ -+failure: -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c -new file mode 100644 -index 000000000..4246aec8b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_5.c -@@ -0,0 +1,26 @@ -+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+void run(void (*)() __arm_streaming); -+ -+/* -+** bar.0: -+** ... -+** smstop sm -+** br x[0-9]+ -+*/ -+int -+foo (int *ptr) -+{ -+ __label__ failure; -+ -+ void bar () __arm_streaming { *ptr += 1; goto failure; } -+ run (bar); -+ return 1; -+ -+failure: -+ return 0; -+} -+ -+// { dg-final { scan-assembler-not {smstart\t} } } -+// { dg-final { scan-assembler-not {mrs\t} } } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c -new file mode 100644 -index 000000000..151e2f22d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_6.c -@@ -0,0 +1,31 @@ -+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+void run(void (*)() __arm_streaming_compatible); -+ -+/* -+** bar.0: -+** ... -+** mrs x16, svcr -+** ... -+** str x16, (.*) -+** ... -+** ldr x16, \1 -+** tbz x16, 0, .* -+** smstop sm -+** br x[0-9]+ -+*/ -+int -+foo (int *ptr) -+{ -+ __label__ failure; -+ -+ void bar () __arm_streaming_compatible { *ptr += 1; goto failure; } -+ run (bar); -+ return 1; -+ -+failure: -+ return 0; -+} -+ -+// { dg-final { scan-assembler-not {smstart\t} } } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_7.c b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_7.c -new file mode 100644 -index 000000000..9cc3ad5d2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/nonlocal_goto_7.c -@@ -0,0 +1,25 @@ -+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ -+ -+void run(void (*)() __arm_inout("za")); -+void callee () __arm_inout("za"); -+ -+int -+foo (int *ptr) -+{ -+ __label__ failure; -+ -+ void bar () __arm_inout("za") -+ { -+ callee (); -+ *ptr += 1; -+ goto failure; -+ } -+ run (bar); -+ return 1; -+ -+failure: -+ return 0; -+} -+ -+// { dg-final { scan-assembler-not {\tsmstart\t} } } -+// { dg-final { scan-assembler-not {\tsmstop\t} } } --- -2.33.0 - diff --git a/0214-Backport-SME-aarch64-Enforce-inlining-restrictions-f.patch b/0214-Backport-SME-aarch64-Enforce-inlining-restrictions-f.patch deleted file mode 100644 index db7c5b3..0000000 --- a/0214-Backport-SME-aarch64-Enforce-inlining-restrictions-f.patch +++ /dev/null @@ -1,913 +0,0 @@ -From c4578108ab766178fe7ebd51421c1ac9f317b675 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:30 +0000 -Subject: [PATCH 115/157] [Backport][SME] aarch64: Enforce inlining - restrictions for SME - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0e9aa05df6c643610a3821af52eda642a525a886 - -A function that has local ZA state cannot be inlined into its caller, -since we only support managing ZA switches at function scope. - -A function whose body directly clobbers ZA state cannot be inlined into -a function with ZA state. - -A function whose body requires a particular PSTATE.SM setting can only -be inlined into a function body that guarantees that PSTATE.SM setting. -The callee's function type doesn't matter here: one locally-streaming -function can be inlined into another. - -gcc/ - * config/aarch64/aarch64.cc: Include symbol-summary.h, ipa-prop.h, - and ipa-fnsummary.h - (aarch64_function_attribute_inlinable_p): New function. - (AARCH64_IPA_SM_FIXED, AARCH64_IPA_CLOBBERS_ZA): New constants. - (aarch64_need_ipa_fn_target_info): New function. - (aarch64_update_ipa_fn_target_info): Likewise. - (aarch64_can_inline_p): Restrict the previous ISA flag checks - to non-modal features. Prevent callees that require a particular - PSTATE.SM state from being inlined into callers that can't guarantee - that state. Also prevent callees that have ZA state from being - inlined into callers that don't. Finally, prevent callees that - clobber ZA from being inlined into callers that have ZA state. - (TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P): Define. - (TARGET_NEED_IPA_FN_TARGET_INFO): Likewise. - (TARGET_UPDATE_IPA_FN_TARGET_INFO): Likewise. - -gcc/testsuite/ - * gcc.target/aarch64/sme/inlining_1.c: New test. - * gcc.target/aarch64/sme/inlining_2.c: Likewise. - * gcc.target/aarch64/sme/inlining_3.c: Likewise. - * gcc.target/aarch64/sme/inlining_4.c: Likewise. - * gcc.target/aarch64/sme/inlining_5.c: Likewise. - * gcc.target/aarch64/sme/inlining_6.c: Likewise. - * gcc.target/aarch64/sme/inlining_7.c: Likewise. - * gcc.target/aarch64/sme/inlining_8.c: Likewise. ---- - gcc/config/aarch64/aarch64.cc | 132 +++++++++++++++++- - .../gcc.target/aarch64/sme/inlining_1.c | 47 +++++++ - .../gcc.target/aarch64/sme/inlining_10.c | 57 ++++++++ - .../gcc.target/aarch64/sme/inlining_11.c | 57 ++++++++ - .../gcc.target/aarch64/sme/inlining_12.c | 15 ++ - .../gcc.target/aarch64/sme/inlining_13.c | 15 ++ - .../gcc.target/aarch64/sme/inlining_14.c | 15 ++ - .../gcc.target/aarch64/sme/inlining_15.c | 27 ++++ - .../gcc.target/aarch64/sme/inlining_2.c | 47 +++++++ - .../gcc.target/aarch64/sme/inlining_3.c | 47 +++++++ - .../gcc.target/aarch64/sme/inlining_4.c | 47 +++++++ - .../gcc.target/aarch64/sme/inlining_5.c | 47 +++++++ - .../gcc.target/aarch64/sme/inlining_6.c | 31 ++++ - .../gcc.target/aarch64/sme/inlining_7.c | 31 ++++ - .../gcc.target/aarch64/sme/inlining_8.c | 31 ++++ - .../gcc.target/aarch64/sme/inlining_9.c | 55 ++++++++ - 16 files changed, 696 insertions(+), 5 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_10.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_11.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_12.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_13.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_14.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_15.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_2.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_3.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_4.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_5.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_6.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_7.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_8.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/inlining_9.c - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index effb567c2..eab94d5c2 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -85,6 +85,9 @@ - #include "except.h" - #include "tree-pass.h" - #include "cfgbuild.h" -+#include "symbol-summary.h" -+#include "ipa-prop.h" -+#include "ipa-fnsummary.h" - - /* This file should be included last. */ - #include "target-def.h" -@@ -21351,6 +21354,17 @@ aarch64_option_valid_attribute_p (tree fndecl, tree, tree args, int) - return ret; - } - -+/* Implement TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P. Use an opt-out -+ rather than an opt-in list. */ -+ -+static bool -+aarch64_function_attribute_inlinable_p (const_tree fndecl) -+{ -+ /* A function that has local ZA state cannot be inlined into its caller, -+ since we only support managing ZA switches at function scope. */ -+ return !aarch64_fndecl_has_new_state (fndecl, "za"); -+} -+ - /* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are - tri-bool options (yes, no, don't care) and the default value is - DEF, determine whether to reject inlining. */ -@@ -21372,6 +21386,60 @@ aarch64_tribools_ok_for_inlining_p (int caller, int callee, - return (callee == caller || callee == def); - } - -+/* Bit allocations for ipa_fn_summary::target_info. */ -+ -+/* Set if the function contains a stmt that relies on the function's -+ choice of PSTATE.SM setting (0 for non-streaming, 1 for streaming). -+ Not meaningful for streaming-compatible functions. */ -+constexpr auto AARCH64_IPA_SM_FIXED = 1U << 0; -+ -+/* Set if the function clobbers ZA. Not meaningful for functions that -+ have ZA state. */ -+constexpr auto AARCH64_IPA_CLOBBERS_ZA = 1U << 1; -+ -+/* Implement TARGET_NEED_IPA_FN_TARGET_INFO. */ -+ -+static bool -+aarch64_need_ipa_fn_target_info (const_tree, unsigned int &) -+{ -+ /* We could in principle skip this for streaming-compatible functions -+ that have ZA state, but that's a rare combination. */ -+ return true; -+} -+ -+/* Implement TARGET_UPDATE_IPA_FN_TARGET_INFO. */ -+ -+static bool -+aarch64_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt) -+{ -+ if (auto *ga = dyn_cast (stmt)) -+ { -+ /* We don't know what the asm does, so conservatively assume that -+ it requires the function's current SM mode. */ -+ info |= AARCH64_IPA_SM_FIXED; -+ for (unsigned int i = 0; i < gimple_asm_nclobbers (ga); ++i) -+ { -+ tree op = gimple_asm_clobber_op (ga, i); -+ const char *clobber = TREE_STRING_POINTER (TREE_VALUE (op)); -+ if (strcmp (clobber, "za") == 0) -+ info |= AARCH64_IPA_CLOBBERS_ZA; -+ } -+ } -+ if (auto *call = dyn_cast (stmt)) -+ { -+ if (gimple_call_builtin_p (call, BUILT_IN_MD)) -+ { -+ /* The attributes on AArch64 builtins are supposed to be accurate. -+ If the function isn't marked streaming-compatible then it -+ needs whichever SM mode it selects. */ -+ tree decl = gimple_call_fndecl (call); -+ if (aarch64_fndecl_pstate_sm (decl) != 0) -+ info |= AARCH64_IPA_SM_FIXED; -+ } -+ } -+ return true; -+} -+ - /* Implement TARGET_CAN_INLINE_P. Decide whether it is valid - to inline CALLEE into CALLER based on target-specific info. - Make sure that the caller and callee have compatible architectural -@@ -21394,12 +21462,56 @@ aarch64_can_inline_p (tree caller, tree callee) - : target_option_default_node); - - /* Callee's ISA flags should be a subset of the caller's. */ -- if ((caller_opts->x_aarch64_asm_isa_flags -- & callee_opts->x_aarch64_asm_isa_flags) -- != callee_opts->x_aarch64_asm_isa_flags) -+ auto caller_asm_isa = (caller_opts->x_aarch64_asm_isa_flags -+ & ~AARCH64_FL_ISA_MODES); -+ auto callee_asm_isa = (callee_opts->x_aarch64_asm_isa_flags -+ & ~AARCH64_FL_ISA_MODES); -+ if (callee_asm_isa & ~caller_asm_isa) - return false; -- if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags) -- != callee_opts->x_aarch64_isa_flags) -+ -+ auto caller_isa = (caller_opts->x_aarch64_isa_flags -+ & ~AARCH64_FL_ISA_MODES); -+ auto callee_isa = (callee_opts->x_aarch64_isa_flags -+ & ~AARCH64_FL_ISA_MODES); -+ if (callee_isa & ~caller_isa) -+ return false; -+ -+ /* Return true if the callee might have target_info property PROPERTY. -+ The answer must be true unless we have positive proof to the contrary. */ -+ auto callee_has_property = [&](unsigned int property) -+ { -+ if (ipa_fn_summaries) -+ if (auto *summary = ipa_fn_summaries->get (cgraph_node::get (callee))) -+ if (!(summary->target_info & property)) -+ return false; -+ return true; -+ }; -+ -+ /* Streaming-compatible code can be inlined into functions with any -+ PSTATE.SM mode. Otherwise the caller and callee must agree on -+ PSTATE.SM mode, unless we can prove that the callee is naturally -+ streaming-compatible. */ -+ auto caller_sm = (caller_opts->x_aarch64_isa_flags & AARCH64_FL_SM_STATE); -+ auto callee_sm = (callee_opts->x_aarch64_isa_flags & AARCH64_FL_SM_STATE); -+ if (callee_sm -+ && caller_sm != callee_sm -+ && callee_has_property (AARCH64_IPA_SM_FIXED)) -+ return false; -+ -+ /* aarch64_function_attribute_inlinable_p prevents new-ZA functions -+ from being inlined into others. We also need to prevent inlining -+ of shared-ZA functions into functions without ZA state, since this -+ is an error condition. -+ -+ The only other problematic case for ZA is inlining a function that -+ directly clobbers ZA into a function that has ZA state. */ -+ auto caller_za = (caller_opts->x_aarch64_isa_flags & AARCH64_FL_ZA_ON); -+ auto callee_za = (callee_opts->x_aarch64_isa_flags & AARCH64_FL_ZA_ON); -+ if (!caller_za && callee_za) -+ return false; -+ if (caller_za -+ && !callee_za -+ && callee_has_property (AARCH64_IPA_CLOBBERS_ZA)) - return false; - - /* Allow non-strict aligned functions inlining into strict -@@ -30732,6 +30844,16 @@ aarch64_get_v16qi_mode () - #undef TARGET_CAN_ELIMINATE - #define TARGET_CAN_ELIMINATE aarch64_can_eliminate - -+#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P -+#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P \ -+ aarch64_function_attribute_inlinable_p -+ -+#undef TARGET_NEED_IPA_FN_TARGET_INFO -+#define TARGET_NEED_IPA_FN_TARGET_INFO aarch64_need_ipa_fn_target_info -+ -+#undef TARGET_UPDATE_IPA_FN_TARGET_INFO -+#define TARGET_UPDATE_IPA_FN_TARGET_INFO aarch64_update_ipa_fn_target_info -+ - #undef TARGET_CAN_INLINE_P - #define TARGET_CAN_INLINE_P aarch64_can_inline_p - -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_1.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_1.c -new file mode 100644 -index 000000000..24dc2b341 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_1.c -@@ -0,0 +1,47 @@ -+/* { dg-options "" } */ -+ -+inline void __attribute__((always_inline)) -+sc_callee () [[arm::streaming_compatible]] {} -+ -+inline void __attribute__((always_inline)) -+s_callee () [[arm::streaming]] {} -+ -+inline void __attribute__((always_inline)) -+n_callee () {} -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+sc_ls_callee () [[arm::streaming_compatible]] {} -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+n_ls_callee () {} -+ -+inline void __attribute__((always_inline)) -+sc_asm_callee () [[arm::streaming_compatible]] { asm (""); } -+ -+inline void __attribute__((always_inline)) -+s_asm_callee () [[arm::streaming]] { asm (""); } // { dg-error "inlining failed" } -+ -+inline void __attribute__((always_inline)) -+n_asm_callee () { asm (""); } // { dg-error "inlining failed" } -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+sc_ls_asm_callee () [[arm::streaming_compatible]] { asm (""); } // { dg-error "inlining failed" } -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+n_ls_asm_callee () { asm (""); } // { dg-error "inlining failed" } -+ -+void -+sc_caller () [[arm::streaming_compatible]] -+{ -+ sc_callee (); -+ s_callee (); -+ n_callee (); -+ sc_ls_callee (); -+ n_ls_callee (); -+ -+ sc_asm_callee (); -+ s_asm_callee (); -+ n_asm_callee (); -+ sc_ls_asm_callee (); -+ n_ls_asm_callee (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_10.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_10.c -new file mode 100644 -index 000000000..adfd45a87 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_10.c -@@ -0,0 +1,57 @@ -+/* { dg-options "" } */ -+ -+#include -+#include -+ -+uint8x16_t *neon; -+svint64_t *sve; -+int64_t *ptr; -+ -+// Gets expanded to addition early, so no error. An error would be -+// more correct though. -+inline void __attribute__((always_inline)) -+call_vadd () -+{ -+ neon[4] = vaddq_u8 (neon[5], neon[6]); -+} -+ -+inline void __attribute__((always_inline)) -+call_vbsl () // { dg-error "inlining failed" } -+{ -+ neon[0] = vbslq_u8 (neon[1], neon[2], neon[3]); -+} -+ -+inline void __attribute__((always_inline)) -+call_svadd () -+{ -+ *sve = svadd_x (svptrue_b8 (), *sve, 1); -+} -+ -+inline void __attribute__((always_inline)) -+call_svld1_gather () // { dg-error "inlining failed" } -+{ -+ *sve = svld1_gather_offset (svptrue_b8 (), ptr, *sve); -+} -+ -+inline void __attribute__((always_inline)) -+call_svzero () [[arm::inout("za")]] -+{ -+ svzero_za (); -+} -+ -+inline void __attribute__((always_inline)) -+call_svst1_za () [[arm::streaming, arm::inout("za")]] // { dg-error "inlining failed" } -+{ -+ svst1_ver_za64 (0, 0, svptrue_b8 (), ptr); -+} -+ -+void -+sc_caller () [[arm::inout("za"), arm::streaming_compatible]] -+{ -+ call_vadd (); -+ call_vbsl (); -+ call_svadd (); -+ call_svld1_gather (); -+ call_svzero (); -+ call_svst1_za (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_11.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_11.c -new file mode 100644 -index 000000000..d05a92c1c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_11.c -@@ -0,0 +1,57 @@ -+/* { dg-options "" } */ -+ -+#include -+#include -+ -+uint8x16_t *neon; -+svint64_t *sve; -+int64_t *ptr; -+ -+// Gets expanded to addition early, so no error. An error would be -+// more correct though. -+inline void __attribute__((always_inline)) -+call_vadd () -+{ -+ neon[4] = vaddq_u8 (neon[5], neon[6]); -+} -+ -+inline void __attribute__((always_inline)) -+call_vbsl () // { dg-error "inlining failed" } -+{ -+ neon[0] = vbslq_u8 (neon[1], neon[2], neon[3]); -+} -+ -+inline void __attribute__((always_inline)) -+call_svadd () -+{ -+ *sve = svadd_x (svptrue_b8 (), *sve, 1); -+} -+ -+inline void __attribute__((always_inline)) -+call_svld1_gather () // { dg-error "inlining failed" } -+{ -+ *sve = svld1_gather_offset (svptrue_b8 (), ptr, *sve); -+} -+ -+inline void __attribute__((always_inline)) -+call_svzero () [[arm::inout("za")]] -+{ -+ svzero_za (); -+} -+ -+inline void __attribute__((always_inline)) -+call_svst1_za () [[arm::streaming, arm::inout("za")]] -+{ -+ svst1_ver_za64 (0, 0, svptrue_b8 (), ptr); -+} -+ -+void -+sc_caller () [[arm::inout("za"), arm::streaming]] -+{ -+ call_vadd (); -+ call_vbsl (); -+ call_svadd (); -+ call_svld1_gather (); -+ call_svzero (); -+ call_svst1_za (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_12.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_12.c -new file mode 100644 -index 000000000..366f8b24a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_12.c -@@ -0,0 +1,15 @@ -+/* { dg-options "" } */ -+ -+#include -+ -+inline void __attribute__((always_inline)) -+call_svzero () [[arm::inout("za"), arm::streaming_compatible]] // { dg-error "inlining failed" } -+{ -+ svzero_za (); -+} -+ -+void -+n_caller () -+{ -+ call_svzero (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_13.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_13.c -new file mode 100644 -index 000000000..bdbd7408c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_13.c -@@ -0,0 +1,15 @@ -+/* { dg-options "" } */ -+ -+#include -+ -+inline void __attribute__((always_inline)) -+call_svzero () [[arm::inout("za"), arm::streaming_compatible]] // { dg-error "inlining failed" } -+{ -+ svzero_za (); -+} -+ -+void -+s_caller () -+{ -+ call_svzero (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_14.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_14.c -new file mode 100644 -index 000000000..0ce4384f6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_14.c -@@ -0,0 +1,15 @@ -+/* { dg-options "" } */ -+ -+#include -+ -+inline void __attribute__((always_inline)) -+call_svzero () [[arm::inout("za"), arm::streaming_compatible]] // { dg-error "inlining failed" } -+{ -+ svzero_za (); -+} -+ -+void -+sc_caller () -+{ -+ call_svzero (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_15.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_15.c -new file mode 100644 -index 000000000..06fc5d7f5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_15.c -@@ -0,0 +1,27 @@ -+/* { dg-options "" } */ -+ -+#include -+ -+inline void -+call_svzero () [[arm::inout("za"), arm::streaming_compatible]] -+{ -+ svzero_za (); -+} -+ -+void -+n_caller () -+{ -+ call_svzero (); // { dg-error "call to a function that shares 'za' state from a function that has no 'za' state" } -+} -+ -+void -+s_caller () -+{ -+ call_svzero (); // { dg-error "call to a function that shares 'za' state from a function that has no 'za' state" } -+} -+ -+void -+sc_caller () -+{ -+ call_svzero (); // { dg-error "call to a function that shares 'za' state from a function that has no 'za' state" } -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_2.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_2.c -new file mode 100644 -index 000000000..ea2a57049 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_2.c -@@ -0,0 +1,47 @@ -+/* { dg-options "" } */ -+ -+inline void __attribute__((always_inline)) -+sc_callee () [[arm::streaming_compatible]] {} -+ -+inline void __attribute__((always_inline)) -+s_callee () [[arm::streaming]] {} -+ -+inline void __attribute__((always_inline)) -+n_callee () {} -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+sc_ls_callee () [[arm::streaming_compatible]] {} -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+n_ls_callee () {} -+ -+inline void __attribute__((always_inline)) -+sc_asm_callee () [[arm::streaming_compatible]] { asm (""); } -+ -+inline void __attribute__((always_inline)) -+s_asm_callee () [[arm::streaming]] { asm (""); } -+ -+inline void __attribute__((always_inline)) -+n_asm_callee () { asm (""); } // { dg-error "inlining failed" } -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+sc_ls_asm_callee () [[arm::streaming_compatible]] { asm (""); } -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+n_ls_asm_callee () { asm (""); } -+ -+void -+s_caller () [[arm::streaming]] -+{ -+ sc_callee (); -+ s_callee (); -+ n_callee (); -+ sc_ls_callee (); -+ n_ls_callee (); -+ -+ sc_asm_callee (); -+ s_asm_callee (); -+ n_asm_callee (); -+ sc_ls_asm_callee (); -+ n_ls_asm_callee (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_3.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_3.c -new file mode 100644 -index 000000000..d7ffb3819 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_3.c -@@ -0,0 +1,47 @@ -+/* { dg-options "" } */ -+ -+inline void __attribute__((always_inline)) -+sc_callee () [[arm::streaming_compatible]] {} -+ -+inline void __attribute__((always_inline)) -+s_callee () [[arm::streaming]] {} -+ -+inline void __attribute__((always_inline)) -+n_callee () {} -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+sc_ls_callee () [[arm::streaming_compatible]] {} -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+n_ls_callee () {} -+ -+inline void __attribute__((always_inline)) -+sc_asm_callee () [[arm::streaming_compatible]] { asm (""); } -+ -+inline void __attribute__((always_inline)) -+s_asm_callee () [[arm::streaming]] { asm (""); } // { dg-error "inlining failed" } -+ -+inline void __attribute__((always_inline)) -+n_asm_callee () { asm (""); } -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+sc_ls_asm_callee () [[arm::streaming_compatible]] { asm (""); } // { dg-error "inlining failed" } -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+n_ls_asm_callee () { asm (""); } // { dg-error "inlining failed" } -+ -+void -+n_caller () -+{ -+ sc_callee (); -+ s_callee (); -+ n_callee (); -+ sc_ls_callee (); -+ n_ls_callee (); -+ -+ sc_asm_callee (); -+ s_asm_callee (); -+ n_asm_callee (); -+ sc_ls_asm_callee (); -+ n_ls_asm_callee (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_4.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_4.c -new file mode 100644 -index 000000000..789203725 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_4.c -@@ -0,0 +1,47 @@ -+/* { dg-options "" } */ -+ -+inline void __attribute__((always_inline)) -+sc_callee () [[arm::streaming_compatible]] {} -+ -+inline void __attribute__((always_inline)) -+s_callee () [[arm::streaming]] {} -+ -+inline void __attribute__((always_inline)) -+n_callee () {} -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+sc_ls_callee () [[arm::streaming_compatible]] {} -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+n_ls_callee () {} -+ -+inline void __attribute__((always_inline)) -+sc_asm_callee () [[arm::streaming_compatible]] { asm (""); } -+ -+inline void __attribute__((always_inline)) -+s_asm_callee () [[arm::streaming]] { asm (""); } -+ -+inline void __attribute__((always_inline)) -+n_asm_callee () { asm (""); } // { dg-error "inlining failed" } -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+sc_ls_asm_callee () [[arm::streaming_compatible]] { asm (""); } -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+n_ls_asm_callee () { asm (""); } -+ -+[[arm::locally_streaming]] void -+sc_ls_caller () [[arm::streaming_compatible]] -+{ -+ sc_callee (); -+ s_callee (); -+ n_callee (); -+ sc_ls_callee (); -+ n_ls_callee (); -+ -+ sc_asm_callee (); -+ s_asm_callee (); -+ n_asm_callee (); -+ sc_ls_asm_callee (); -+ n_ls_asm_callee (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_5.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_5.c -new file mode 100644 -index 000000000..d19cdc450 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_5.c -@@ -0,0 +1,47 @@ -+/* { dg-options "" } */ -+ -+inline void __attribute__((always_inline)) -+sc_callee () [[arm::streaming_compatible]] {} -+ -+inline void __attribute__((always_inline)) -+s_callee () [[arm::streaming]] {} -+ -+inline void __attribute__((always_inline)) -+n_callee () {} -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+sc_ls_callee () [[arm::streaming_compatible]] {} -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+n_ls_callee () {} -+ -+inline void __attribute__((always_inline)) -+sc_asm_callee () [[arm::streaming_compatible]] { asm (""); } -+ -+inline void __attribute__((always_inline)) -+s_asm_callee () [[arm::streaming]] { asm (""); } -+ -+inline void __attribute__((always_inline)) -+n_asm_callee () { asm (""); } // { dg-error "inlining failed" } -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+sc_ls_asm_callee () [[arm::streaming_compatible]] { asm (""); } -+ -+[[arm::locally_streaming]] inline void __attribute__((always_inline)) -+n_ls_asm_callee () { asm (""); } -+ -+[[arm::locally_streaming]] void -+n_ls_caller () -+{ -+ sc_callee (); -+ s_callee (); -+ n_callee (); -+ sc_ls_callee (); -+ n_ls_callee (); -+ -+ sc_asm_callee (); -+ s_asm_callee (); -+ n_asm_callee (); -+ sc_ls_asm_callee (); -+ n_ls_asm_callee (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_6.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_6.c -new file mode 100644 -index 000000000..a5eb399f1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_6.c -@@ -0,0 +1,31 @@ -+/* { dg-options "" } */ -+ -+inline void __attribute__((always_inline)) -+shared_callee () [[arm::inout("za")]] {} -+ -+[[arm::new("za")]] inline void __attribute__((always_inline)) -+new_callee () {} // { dg-error "inlining failed" } -+ -+inline void __attribute__((always_inline)) -+normal_callee () {} -+ -+inline void __attribute__((always_inline)) -+shared_asm_callee () [[arm::inout("za")]] { asm volatile ("" ::: "za"); } -+ -+[[arm::new("za")]] inline void __attribute__((always_inline)) -+new_asm_callee () { asm volatile ("" ::: "za"); } // { dg-error "inlining failed" } -+ -+inline void __attribute__((always_inline)) -+normal_asm_callee () { asm volatile ("" ::: "za"); } // { dg-error "inlining failed" } -+ -+void -+shared_caller () [[arm::inout("za")]] -+{ -+ shared_callee (); -+ new_callee (); -+ normal_callee (); -+ -+ shared_asm_callee (); -+ new_asm_callee (); -+ normal_asm_callee (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_7.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_7.c -new file mode 100644 -index 000000000..0f046283f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_7.c -@@ -0,0 +1,31 @@ -+/* { dg-options "" } */ -+ -+inline void __attribute__((always_inline)) -+shared_callee () [[arm::inout("za")]] {} -+ -+[[arm::new("za")]] inline void __attribute__((always_inline)) -+new_callee () {} // { dg-error "inlining failed" } -+ -+inline void __attribute__((always_inline)) -+normal_callee () {} -+ -+inline void __attribute__((always_inline)) -+shared_asm_callee () [[arm::inout("za")]] { asm volatile ("" ::: "za"); } -+ -+[[arm::new("za")]] inline void __attribute__((always_inline)) -+new_asm_callee () { asm volatile ("" ::: "za"); } // { dg-error "inlining failed" } -+ -+inline void __attribute__((always_inline)) -+normal_asm_callee () { asm volatile ("" ::: "za"); } // { dg-error "inlining failed" } -+ -+[[arm::new("za")]] void -+new_caller () -+{ -+ shared_callee (); -+ new_callee (); -+ normal_callee (); -+ -+ shared_asm_callee (); -+ new_asm_callee (); -+ normal_asm_callee (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_8.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_8.c -new file mode 100644 -index 000000000..fd8a3a61e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_8.c -@@ -0,0 +1,31 @@ -+/* { dg-options "" } */ -+ -+inline void __attribute__((always_inline)) -+shared_callee () [[arm::inout("za")]] {} // { dg-error "inlining failed" } -+ -+[[arm::new("za")]] inline void __attribute__((always_inline)) -+new_callee () {} // { dg-error "inlining failed" } -+ -+inline void __attribute__((always_inline)) -+normal_callee () {} -+ -+inline void __attribute__((always_inline)) -+shared_asm_callee () [[arm::inout("za")]] { asm volatile ("" ::: "za"); } // { dg-error "inlining failed" } -+ -+[[arm::new("za")]] inline void __attribute__((always_inline)) -+new_asm_callee () { asm volatile ("" ::: "za"); } // { dg-error "inlining failed" } -+ -+inline void __attribute__((always_inline)) -+normal_asm_callee () { asm volatile ("" ::: "za"); } -+ -+void -+normal_caller () -+{ -+ shared_callee (); -+ new_callee (); -+ normal_callee (); -+ -+ shared_asm_callee (); -+ new_asm_callee (); -+ normal_asm_callee (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/inlining_9.c b/gcc/testsuite/gcc.target/aarch64/sme/inlining_9.c -new file mode 100644 -index 000000000..91520e378 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/inlining_9.c -@@ -0,0 +1,55 @@ -+/* { dg-options "" } */ -+ -+#include -+#include -+ -+uint8x16_t *neon; -+svint64_t *sve; -+int64_t *ptr; -+ -+inline void __attribute__((always_inline)) -+call_vadd () -+{ -+ neon[4] = vaddq_u8 (neon[5], neon[6]); -+} -+ -+inline void __attribute__((always_inline)) -+call_vbsl () -+{ -+ neon[0] = vbslq_u8 (neon[1], neon[2], neon[3]); -+} -+ -+inline void __attribute__((always_inline)) -+call_svadd () -+{ -+ *sve = svadd_x (svptrue_b8 (), *sve, 1); -+} -+ -+inline void __attribute__((always_inline)) -+call_svld1_gather () -+{ -+ *sve = svld1_gather_offset (svptrue_b8 (), ptr, *sve); -+} -+ -+inline void __attribute__((always_inline)) -+call_svzero () [[arm::inout("za")]] -+{ -+ svzero_za (); -+} -+ -+inline void __attribute__((always_inline)) -+call_svst1_za () [[arm::streaming, arm::inout("za")]] // { dg-error "inlining failed" } -+{ -+ svst1_ver_za64 (0, 0, svptrue_b8 (), ptr); -+} -+ -+void -+n_caller () [[arm::inout("za")]] -+{ -+ call_vadd (); -+ call_vbsl (); -+ call_svadd (); -+ call_svld1_gather (); -+ call_svzero (); -+ call_svst1_za (); -+} --- -2.33.0 - diff --git a/0215-Backport-SME-aarch64-Update-sibcall-handling-for-SME.patch b/0215-Backport-SME-aarch64-Update-sibcall-handling-for-SME.patch deleted file mode 100644 index aa57f9c..0000000 --- a/0215-Backport-SME-aarch64-Update-sibcall-handling-for-SME.patch +++ /dev/null @@ -1,424 +0,0 @@ -From 08b6cbe756ede25b16b8e9ff9ee32f76c4f8430f Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 10:11:30 +0000 -Subject: [PATCH 116/157] [Backport][SME] aarch64: Update sibcall handling for - SME - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0e7fee57c00ae17611651e0b057dc03b6e276b82 - -We only support tail calls between functions with the same PSTATE.ZA -setting ("private-ZA" to "private-ZA" and "shared-ZA" to "shared-ZA"). - -Only a normal non-streaming function can tail-call another non-streaming -function, and only a streaming function can tail-call another streaming -function. Any function can tail-call a streaming-compatible function. - -gcc/ - * config/aarch64/aarch64.cc (aarch64_function_ok_for_sibcall): - Enforce PSTATE.SM and PSTATE.ZA restrictions. - (aarch64_expand_epilogue): Save and restore the arguments - to a sibcall around any change to PSTATE.SM. - -gcc/testsuite/ - * gcc.target/aarch64/sme/sibcall_1.c: New test. - * gcc.target/aarch64/sme/sibcall_2.c: Likewise. - * gcc.target/aarch64/sme/sibcall_3.c: Likewise. - * gcc.target/aarch64/sme/sibcall_4.c: Likewise. - * gcc.target/aarch64/sme/sibcall_5.c: Likewise. - * gcc.target/aarch64/sme/sibcall_6.c: Likewise. - * gcc.target/aarch64/sme/sibcall_7.c: Likewise. - * gcc.target/aarch64/sme/sibcall_8.c: Likewise. ---- - gcc/config/aarch64/aarch64.cc | 9 +++- - .../gcc.target/aarch64/sme/sibcall_1.c | 45 +++++++++++++++++++ - .../gcc.target/aarch64/sme/sibcall_2.c | 45 +++++++++++++++++++ - .../gcc.target/aarch64/sme/sibcall_3.c | 45 +++++++++++++++++++ - .../gcc.target/aarch64/sme/sibcall_4.c | 45 +++++++++++++++++++ - .../gcc.target/aarch64/sme/sibcall_5.c | 45 +++++++++++++++++++ - .../gcc.target/aarch64/sme/sibcall_6.c | 26 +++++++++++ - .../gcc.target/aarch64/sme/sibcall_7.c | 26 +++++++++++ - .../gcc.target/aarch64/sme/sibcall_8.c | 19 ++++++++ - 9 files changed, 304 insertions(+), 1 deletion(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_4.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_5.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_6.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_7.c - create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/sibcall_8.c - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index eab94d5c2..b8e540b6e 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -8660,6 +8660,11 @@ aarch64_function_ok_for_sibcall (tree, tree exp) - if (crtl->abi->id () != expr_callee_abi (exp).id ()) - return false; - -+ tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp))); -+ if (aarch64_fntype_pstate_sm (fntype) & ~aarch64_cfun_incoming_pstate_sm ()) -+ return false; -+ if (aarch64_fntype_pstate_za (fntype) != aarch64_cfun_incoming_pstate_za ()) -+ return false; - return true; - } - -@@ -11923,7 +11928,9 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall) - guard_label = aarch64_guard_switch_pstate_sm (IP0_REGNUM, - aarch64_isa_flags); - aarch64_sme_mode_switch_regs return_switch; -- if (crtl->return_rtx && REG_P (crtl->return_rtx)) -+ if (sibcall) -+ return_switch.add_call_args (sibcall); -+ else if (crtl->return_rtx && REG_P (crtl->return_rtx)) - return_switch.add_reg (GET_MODE (crtl->return_rtx), - REGNO (crtl->return_rtx)); - return_switch.emit_prologue (); -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c -new file mode 100644 -index 000000000..c7530de5c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_1.c -@@ -0,0 +1,45 @@ -+/* { dg-options "-O2" } */ -+ -+void sc_callee () [[arm::streaming_compatible]]; -+void s_callee () [[arm::streaming]]; -+void n_callee (); -+ -+[[arm::locally_streaming]] __attribute__((noipa)) void -+sc_ls_callee () [[arm::streaming_compatible]] {} -+[[arm::locally_streaming]] __attribute__((noipa)) void -+n_ls_callee () {} -+ -+void -+sc_to_sc () [[arm::streaming_compatible]] -+{ -+ sc_callee (); -+} -+/* { dg-final { scan-assembler {\tb\tsc_callee} } } */ -+ -+void -+sc_to_s () [[arm::streaming_compatible]] -+{ -+ s_callee (); -+} -+/* { dg-final { scan-assembler {\tbl\ts_callee} } } */ -+ -+void -+sc_to_n () [[arm::streaming_compatible]] -+{ -+ n_callee (); -+} -+/* { dg-final { scan-assembler {\tbl\tn_callee} } } */ -+ -+void -+sc_to_sc_ls () [[arm::streaming_compatible]] -+{ -+ sc_ls_callee (); -+} -+/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */ -+ -+void -+sc_to_n_ls () [[arm::streaming_compatible]] -+{ -+ n_ls_callee (); -+} -+/* { dg-final { scan-assembler {\tbl\tn_ls_callee} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c -new file mode 100644 -index 000000000..8d1c8a9f9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_2.c -@@ -0,0 +1,45 @@ -+/* { dg-options "-O2" } */ -+ -+void sc_callee () [[arm::streaming_compatible]]; -+void s_callee () [[arm::streaming]]; -+void n_callee (); -+ -+[[arm::locally_streaming]] __attribute__((noipa)) void -+sc_ls_callee () [[arm::streaming_compatible]] {} -+[[arm::locally_streaming]] __attribute__((noipa)) void -+n_ls_callee () {} -+ -+void -+s_to_sc () [[arm::streaming]] -+{ -+ sc_callee (); -+} -+/* { dg-final { scan-assembler {\tb\tsc_callee} } } */ -+ -+void -+s_to_s () [[arm::streaming]] -+{ -+ s_callee (); -+} -+/* { dg-final { scan-assembler {\tb\ts_callee} } } */ -+ -+void -+s_to_n () [[arm::streaming]] -+{ -+ n_callee (); -+} -+/* { dg-final { scan-assembler {\tbl\tn_callee} } } */ -+ -+void -+s_to_sc_ls () [[arm::streaming]] -+{ -+ sc_ls_callee (); -+} -+/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */ -+ -+void -+s_to_n_ls () [[arm::streaming]] -+{ -+ n_ls_callee (); -+} -+/* { dg-final { scan-assembler {\tbl\tn_ls_callee} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c -new file mode 100644 -index 000000000..2ae937fc5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_3.c -@@ -0,0 +1,45 @@ -+/* { dg-options "-O2" } */ -+ -+void sc_callee () [[arm::streaming_compatible]]; -+void s_callee () [[arm::streaming]]; -+void n_callee (); -+ -+[[arm::locally_streaming]] __attribute__((noipa)) void -+sc_ls_callee () [[arm::streaming_compatible]] {} -+[[arm::locally_streaming]] __attribute__((noipa)) void -+n_ls_callee () {} -+ -+void -+n_to_sc () -+{ -+ sc_callee (); -+} -+/* { dg-final { scan-assembler {\tb\tsc_callee} } } */ -+ -+void -+n_to_s () -+{ -+ s_callee (); -+} -+/* { dg-final { scan-assembler {\tbl\ts_callee} } } */ -+ -+void -+n_to_n () -+{ -+ n_callee (); -+} -+/* { dg-final { scan-assembler {\tb\tn_callee} } } */ -+ -+void -+n_to_sc_ls () -+{ -+ sc_ls_callee (); -+} -+/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */ -+ -+void -+n_to_n_ls () -+{ -+ n_ls_callee (); -+} -+/* { dg-final { scan-assembler {\tb\tn_ls_callee} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_4.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_4.c -new file mode 100644 -index 000000000..6935a1bd7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_4.c -@@ -0,0 +1,45 @@ -+/* { dg-options "-O2" } */ -+ -+void sc_callee () [[arm::streaming_compatible]]; -+void s_callee () [[arm::streaming]]; -+void n_callee (); -+ -+[[arm::locally_streaming]] __attribute__((noipa)) void -+sc_ls_callee () [[arm::streaming_compatible]] {} -+[[arm::locally_streaming]] __attribute__((noipa)) void -+n_ls_callee () {} -+ -+[[arm::locally_streaming]] void -+sc_to_sc () [[arm::streaming_compatible]] -+{ -+ sc_callee (); -+} -+/* { dg-final { scan-assembler {\tb\tsc_callee} } } */ -+ -+[[arm::locally_streaming]] void -+sc_to_s () [[arm::streaming_compatible]] -+{ -+ s_callee (); -+} -+/* { dg-final { scan-assembler {\tbl\ts_callee} } } */ -+ -+[[arm::locally_streaming]] void -+sc_to_n () [[arm::streaming_compatible]] -+{ -+ n_callee (); -+} -+/* { dg-final { scan-assembler {\tbl\tn_callee} } } */ -+ -+[[arm::locally_streaming]] void -+sc_to_sc_ls () [[arm::streaming_compatible]] -+{ -+ sc_ls_callee (); -+} -+/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */ -+ -+[[arm::locally_streaming]] void -+sc_to_n_ls () [[arm::streaming_compatible]] -+{ -+ n_ls_callee (); -+} -+/* { dg-final { scan-assembler {\tbl\tn_ls_callee} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_5.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_5.c -new file mode 100644 -index 000000000..7aaf58dfa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_5.c -@@ -0,0 +1,45 @@ -+/* { dg-options "-O2" } */ -+ -+void sc_callee () [[arm::streaming_compatible]]; -+void s_callee () [[arm::streaming]]; -+void n_callee (); -+ -+[[arm::locally_streaming]] __attribute__((noipa)) void -+sc_ls_callee () [[arm::streaming_compatible]] {} -+[[arm::locally_streaming]] __attribute__((noipa)) void -+n_ls_callee () {} -+ -+[[arm::locally_streaming]] void -+n_to_sc () -+{ -+ sc_callee (); -+} -+/* { dg-final { scan-assembler {\tb\tsc_callee} } } */ -+ -+[[arm::locally_streaming]] void -+n_to_s () -+{ -+ s_callee (); -+} -+/* { dg-final { scan-assembler {\tbl\ts_callee} } } */ -+ -+[[arm::locally_streaming]] void -+n_to_n () -+{ -+ n_callee (); -+} -+/* { dg-final { scan-assembler {\tb\tn_callee} } } */ -+ -+[[arm::locally_streaming]] void -+n_to_sc_ls () -+{ -+ sc_ls_callee (); -+} -+/* { dg-final { scan-assembler {\tb\tsc_ls_callee} } } */ -+ -+[[arm::locally_streaming]] void -+n_to_n_ls () -+{ -+ n_ls_callee (); -+} -+/* { dg-final { scan-assembler {\tb\tn_ls_callee} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_6.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_6.c -new file mode 100644 -index 000000000..e568edb17 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_6.c -@@ -0,0 +1,26 @@ -+/* { dg-options "-O2" } */ -+ -+void shared_callee () [[arm::inout("za")]]; -+[[arm::new("za")]] __attribute__((noipa)) void new_callee () {} -+void normal_callee (); -+ -+void -+shared_to_shared () [[arm::inout("za")]] -+{ -+ shared_callee (); -+} -+/* { dg-final { scan-assembler {\tb\tshared_callee} } } */ -+ -+void -+shared_to_new () [[arm::inout("za")]] -+{ -+ new_callee (); -+} -+/* { dg-final { scan-assembler {\tbl\tnew_callee} } } */ -+ -+void -+shared_to_normal () [[arm::inout("za")]] -+{ -+ normal_callee (); -+} -+/* { dg-final { scan-assembler {\tbl\tnormal_callee} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_7.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_7.c -new file mode 100644 -index 000000000..a5f576d20 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_7.c -@@ -0,0 +1,26 @@ -+/* { dg-options "-O2" } */ -+ -+void shared_callee () [[arm::inout("za")]]; -+[[arm::new("za")]] __attribute__((noipa)) void new_callee () {} -+void normal_callee (); -+ -+[[arm::new("za")]] void -+new_to_shared () -+{ -+ shared_callee (); -+} -+/* { dg-final { scan-assembler {\tbl\tshared_callee} } } */ -+ -+[[arm::new("za")]] void -+new_to_new () -+{ -+ new_callee (); -+} -+/* { dg-final { scan-assembler {\tb\tnew_callee} } } */ -+ -+[[arm::new("za")]] void -+new_to_normal () -+{ -+ normal_callee (); -+} -+/* { dg-final { scan-assembler {\tb\tnormal_callee} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/sibcall_8.c b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_8.c -new file mode 100644 -index 000000000..33370f7a8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sme/sibcall_8.c -@@ -0,0 +1,19 @@ -+/* { dg-options "-O2" } */ -+ -+void shared_callee () [[arm::inout("za")]]; -+[[arm::new("za")]] __attribute__((noipa)) void new_callee () {} -+void normal_callee (); -+ -+void -+normal_to_new () -+{ -+ new_callee (); -+} -+/* { dg-final { scan-assembler {\tb\tnew_callee} } } */ -+ -+void -+normal_to_normal () -+{ -+ normal_callee (); -+} -+/* { dg-final { scan-assembler {\tb\tnormal_callee} } } */ --- -2.33.0 - diff --git a/0216-Backport-SME-libgcc-aarch64-Configure-check-for-.var.patch b/0216-Backport-SME-libgcc-aarch64-Configure-check-for-.var.patch deleted file mode 100644 index 6e8467e..0000000 --- a/0216-Backport-SME-libgcc-aarch64-Configure-check-for-.var.patch +++ /dev/null @@ -1,117 +0,0 @@ -From e0da78a258a34c26488b7ae623f9ae8727c2b264 Mon Sep 17 00:00:00 2001 -From: Szabolcs Nagy -Date: Mon, 14 Nov 2022 17:14:18 +0000 -Subject: [PATCH 117/157] [Backport][SME] libgcc: aarch64: Configure check for - .variant_pcs support - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3ebb591c65b4fbe0cddd23ebc0ca2d9f7aef2cec - -Ideally SME support routines in libgcc are marked as variant PCS symbols -so check if as supports the directive. - -libgcc/ChangeLog: - - * config.in: Undef HAVE_AS_VARIANT_PCS. - * configure: Regenerate. - * configure.ac: Check for .variant_pcs. ---- - libgcc/config.in | 3 +++ - libgcc/configure | 39 +++++++++++++++++++++++++++++++++++++++ - libgcc/configure.ac | 17 +++++++++++++++++ - 3 files changed, 59 insertions(+) - -diff --git a/libgcc/config.in b/libgcc/config.in -index f93c64a00..5dd96cdf6 100644 ---- a/libgcc/config.in -+++ b/libgcc/config.in -@@ -13,6 +13,9 @@ - /* Define to 1 if the assembler supports LSE. */ - #undef HAVE_AS_LSE - -+/* Define to 1 if the assembler supports .variant_pcs. */ -+#undef HAVE_AS_VARIANT_PCS -+ - /* Define to 1 if the target assembler supports thread-local storage. */ - #undef HAVE_CC_TLS - -diff --git a/libgcc/configure b/libgcc/configure -index 1f9b2ac57..afe02b303 100755 ---- a/libgcc/configure -+++ b/libgcc/configure -@@ -5619,6 +5619,45 @@ $as_echo "#define HAVE_AS_LSE 1" >>confdefs.h - ;; - esac - -+ -+ -+case "${target}" in -+aarch64*-*-*) -+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking if as supports .variant_pcs" >&5 -+$as_echo_n "checking if as supports .variant_pcs... " >&6; } -+if ${libgcc_cv_as_variant_pcs+:} false; then : -+ $as_echo_n "(cached) " >&6 -+else -+ -+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext -+/* end confdefs.h. */ -+ -+int -+main () -+{ -+asm (".variant_pcs foobar"); -+ ; -+ return 0; -+} -+_ACEOF -+if ac_fn_c_try_compile "$LINENO"; then : -+ libgcc_cv_as_variant_pcs=yes -+else -+ libgcc_cv_as_variant_pcs=no -+fi -+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -+ -+fi -+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgcc_cv_as_variant_pcs" >&5 -+$as_echo "$libgcc_cv_as_variant_pcs" >&6; } -+ if test x$libgcc_cv_as_variant_pcs = xyes; then -+ -+$as_echo "#define HAVE_AS_VARIANT_PCS 1" >>confdefs.h -+ -+ fi -+ ;; -+esac -+ - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for init priority support" >&5 - $as_echo_n "checking for init priority support... " >&6; } - if ${libgcc_cv_init_priority+:} false; then : -diff --git a/libgcc/configure.ac b/libgcc/configure.ac -index 2fc9d5d7c..abc398c91 100644 ---- a/libgcc/configure.ac -+++ b/libgcc/configure.ac -@@ -648,6 +648,23 @@ changequote([,])dnl - esac]) - LIBGCC_CHECK_AS_LSE - -+dnl Check if as supports .variant_pcs. -+AC_DEFUN([LIBGCC_CHECK_AS_VARIANT_PCS], [ -+case "${target}" in -+aarch64*-*-*) -+ AC_CACHE_CHECK([if as supports .variant_pcs], libgcc_cv_as_variant_pcs, [ -+ AC_COMPILE_IFELSE([AC_LANG_PROGRAM(, -+ [[asm (".variant_pcs foobar");]])], -+ [libgcc_cv_as_variant_pcs=yes], [libgcc_cv_as_variant_pcs=no]) -+ ]) -+ if test x$libgcc_cv_as_variant_pcs = xyes; then -+ AC_DEFINE(HAVE_AS_VARIANT_PCS, 1, -+ [Define to 1 if the assembler supports .variant_pcs.]) -+ fi -+ ;; -+esac]) -+LIBGCC_CHECK_AS_VARIANT_PCS -+ - dnl Check if as supports RTM instructions. - AC_CACHE_CHECK(for init priority support, libgcc_cv_init_priority, [ - AC_COMPILE_IFELSE([AC_LANG_PROGRAM(, --- -2.33.0 - diff --git a/0217-Backport-SME-libgcc-aarch64-Configure-check-for-__ge.patch b/0217-Backport-SME-libgcc-aarch64-Configure-check-for-__ge.patch deleted file mode 100644 index 07c1417..0000000 --- a/0217-Backport-SME-libgcc-aarch64-Configure-check-for-__ge.patch +++ /dev/null @@ -1,117 +0,0 @@ -From 66d4035958e1dee2d16f9290004921674eb492b3 Mon Sep 17 00:00:00 2001 -From: Szabolcs Nagy -Date: Mon, 4 Dec 2023 10:52:52 +0000 -Subject: [PATCH 118/157] [Backport][SME] libgcc: aarch64: Configure check for - __getauxval - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dbbfb52b0e9c66ee9d05b8fd17c4f44655e48463 - -Add configure check for the __getauxval ABI symbol, which is always -available on aarch64 glibc, and may be available on other linux C -runtimes. For now only enabled on glibc, others have to override it - - target_configargs=libgcc_cv_have___getauxval=yes - -This is deliberately obscure as it should be auto detected, ideally -via a feature test macro in unistd.h (link time detection is not -possible since the libc may not be installed at libgcc build time), -but currently there is no such feature test mechanism. - -Without __getauxval, libgcc cannot do runtime CPU feature detection -and has to assume only the build time known features are available. - -libgcc/ChangeLog: - - * config.in: Undef HAVE___GETAUXVAL. - * configure: Regenerate. - * configure.ac: Check for __getauxval. ---- - libgcc/config.in | 3 +++ - libgcc/configure | 26 ++++++++++++++++++++++++++ - libgcc/configure.ac | 19 +++++++++++++++++++ - 3 files changed, 48 insertions(+) - -diff --git a/libgcc/config.in b/libgcc/config.in -index 5dd96cdf6..441d4d39b 100644 ---- a/libgcc/config.in -+++ b/libgcc/config.in -@@ -16,6 +16,9 @@ - /* Define to 1 if the assembler supports .variant_pcs. */ - #undef HAVE_AS_VARIANT_PCS - -+/* Define to 1 if __getauxval is available. */ -+#undef HAVE___GETAUXVAL -+ - /* Define to 1 if the target assembler supports thread-local storage. */ - #undef HAVE_CC_TLS - -diff --git a/libgcc/configure b/libgcc/configure -index afe02b303..a874ef57e 100755 ---- a/libgcc/configure -+++ b/libgcc/configure -@@ -5658,6 +5658,32 @@ $as_echo "#define HAVE_AS_VARIANT_PCS 1" >>confdefs.h - ;; - esac - -+# Check __getauxval ABI symbol for CPU feature detection. -+case ${target} in -+aarch64*-linux-*) -+ # No link check because the libc may not be present. -+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for __getauxval" >&5 -+$as_echo_n "checking for __getauxval... " >&6; } -+if ${libgcc_cv_have___getauxval+:} false; then : -+ $as_echo_n "(cached) " >&6 -+else -+ case ${target} in -+ *-linux-gnu*) -+ libgcc_cv_have___getauxval=yes -+ ;; -+ *) -+ libgcc_cv_have___getauxval=no -+ esac -+fi -+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgcc_cv_have___getauxval" >&5 -+$as_echo "$libgcc_cv_have___getauxval" >&6; } -+ if test x$libgcc_cv_have___getauxval = xyes; then -+ -+$as_echo "#define HAVE___GETAUXVAL 1" >>confdefs.h -+ -+ fi -+esac -+ - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for init priority support" >&5 - $as_echo_n "checking for init priority support... " >&6; } - if ${libgcc_cv_init_priority+:} false; then : -diff --git a/libgcc/configure.ac b/libgcc/configure.ac -index abc398c91..64b45ae14 100644 ---- a/libgcc/configure.ac -+++ b/libgcc/configure.ac -@@ -665,6 +665,25 @@ aarch64*-*-*) - esac]) - LIBGCC_CHECK_AS_VARIANT_PCS - -+# Check __getauxval ABI symbol for CPU feature detection. -+case ${target} in -+aarch64*-linux-*) -+ # No link check because the libc may not be present. -+ AC_CACHE_CHECK([for __getauxval], -+ [libgcc_cv_have___getauxval], -+ [case ${target} in -+ *-linux-gnu*) -+ libgcc_cv_have___getauxval=yes -+ ;; -+ *) -+ libgcc_cv_have___getauxval=no -+ esac]) -+ if test x$libgcc_cv_have___getauxval = xyes; then -+ AC_DEFINE(HAVE___GETAUXVAL, 1, -+ [Define to 1 if __getauxval is available.]) -+ fi -+esac -+ - dnl Check if as supports RTM instructions. - AC_CACHE_CHECK(for init priority support, libgcc_cv_init_priority, [ - AC_COMPILE_IFELSE([AC_LANG_PROGRAM(, --- -2.33.0 - diff --git a/0218-Backport-SME-libgcc-aarch64-Add-SME-runtime-support.patch b/0218-Backport-SME-libgcc-aarch64-Add-SME-runtime-support.patch deleted file mode 100644 index 30b85d9..0000000 --- a/0218-Backport-SME-libgcc-aarch64-Add-SME-runtime-support.patch +++ /dev/null @@ -1,627 +0,0 @@ -From 1e111ac2d71c5469dc526559de009542acaeb16f Mon Sep 17 00:00:00 2001 -From: Szabolcs Nagy -Date: Tue, 15 Nov 2022 14:08:55 +0000 -Subject: [PATCH 119/157] [Backport][SME] libgcc: aarch64: Add SME runtime - support - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=328c17af772207cb03740809c05ba2c3abfb86be - -The call ABI for SME (Scalable Matrix Extension) requires a number of -helper routines which are added to libgcc so they are tied to the -compiler version instead of the libc version. See -https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#sme-support-routines - -The routines are in shared libgcc and static libgcc eh, even though -they are not related to exception handling. This is to avoid linking -a copy of the routines into dynamic linked binaries, because TPIDR2_EL0 -block can be extended in the future which is better to handle in a -single place per process. - -The support routines have to decide if SME is accessible or not. Linux -tells userspace if SME is accessible via AT_HWCAP2, otherwise a new -__aarch64_sme_accessible symbol was introduced that a libc can define. -Due to libgcc and libc build order, the symbol availability cannot be -checked so for __aarch64_sme_accessible an unistd.h feature test macro -is used while such detection mechanism is not available for __getauxval -so we rely on configure checks based on the target triplet. - -Asm helper code is added to make writing the routines easier. - -libgcc/ChangeLog: - - * config/aarch64/t-aarch64: Add sources to the build. - * config/aarch64/__aarch64_have_sme.c: New file. - * config/aarch64/__arm_sme_state.S: New file. - * config/aarch64/__arm_tpidr2_restore.S: New file. - * config/aarch64/__arm_tpidr2_save.S: New file. - * config/aarch64/__arm_za_disable.S: New file. - * config/aarch64/aarch64-asm.h: New file. - * config/aarch64/libgcc-sme.ver: New file. ---- - libgcc/config/aarch64/__aarch64_have_sme.c | 75 ++++++++++++++ - libgcc/config/aarch64/__arm_sme_state.S | 55 ++++++++++ - libgcc/config/aarch64/__arm_tpidr2_restore.S | 89 ++++++++++++++++ - libgcc/config/aarch64/__arm_tpidr2_save.S | 101 +++++++++++++++++++ - libgcc/config/aarch64/__arm_za_disable.S | 65 ++++++++++++ - libgcc/config/aarch64/aarch64-asm.h | 98 ++++++++++++++++++ - libgcc/config/aarch64/libgcc-sme.ver | 24 +++++ - libgcc/config/aarch64/t-aarch64 | 10 ++ - 8 files changed, 517 insertions(+) - create mode 100644 libgcc/config/aarch64/__aarch64_have_sme.c - create mode 100644 libgcc/config/aarch64/__arm_sme_state.S - create mode 100644 libgcc/config/aarch64/__arm_tpidr2_restore.S - create mode 100644 libgcc/config/aarch64/__arm_tpidr2_save.S - create mode 100644 libgcc/config/aarch64/__arm_za_disable.S - create mode 100644 libgcc/config/aarch64/aarch64-asm.h - create mode 100644 libgcc/config/aarch64/libgcc-sme.ver - -diff --git a/libgcc/config/aarch64/__aarch64_have_sme.c b/libgcc/config/aarch64/__aarch64_have_sme.c -new file mode 100644 -index 000000000..5e6492462 ---- /dev/null -+++ b/libgcc/config/aarch64/__aarch64_have_sme.c -@@ -0,0 +1,75 @@ -+/* Initializer for SME support. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published -+ by the Free Software Foundation; either version 3, or (at your -+ option) any later version. -+ -+ GCC is distributed in the hope that it will be useful, but WITHOUT -+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -+ License for more details. -+ -+ Under Section 7 of GPL version 3, you are granted additional -+ permissions described in the GCC Runtime Library Exception, version -+ 3.1, as published by the Free Software Foundation. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ . */ -+ -+#include "auto-target.h" -+ -+#ifndef inhibit_libc -+/* For libc feature test macros. */ -+# include -+#endif -+ -+#if __ARM_FEATURE_SME -+/* Avoid runtime SME detection if libgcc is built with SME. */ -+# define HAVE_SME_CONST const -+# define HAVE_SME_VALUE 1 -+#elif HAVE___GETAUXVAL -+/* SME access detection on Linux. */ -+# define HAVE_SME_CONST -+# define HAVE_SME_VALUE 0 -+# define HAVE_SME_CTOR sme_accessible () -+ -+# define AT_HWCAP2 26 -+# define HWCAP2_SME (1 << 23) -+unsigned long int __getauxval (unsigned long int); -+ -+static _Bool -+sme_accessible (void) -+{ -+ unsigned long hwcap2 = __getauxval (AT_HWCAP2); -+ return (hwcap2 & HWCAP2_SME) != 0; -+} -+#elif __LIBC___AARCH64_SME_ACCESSIBLE -+/* Alternative SME access detection. */ -+# define HAVE_SME_CONST -+# define HAVE_SME_VALUE 0 -+# define HAVE_SME_CTOR __aarch64_sme_accessible () -+_Bool __aarch64_sme_accessible (void); -+#else -+# define HAVE_SME_CONST const -+# define HAVE_SME_VALUE 0 -+#endif -+ -+/* Define the symbol gating SME support in libgcc. */ -+HAVE_SME_CONST _Bool __aarch64_have_sme -+ __attribute__((visibility("hidden"), nocommon)) = HAVE_SME_VALUE; -+ -+#ifdef HAVE_SME_CTOR -+/* Use a higher priority to ensure it runs before user constructors -+ with priority 100. */ -+static void __attribute__((constructor (90))) -+init_have_sme (void) -+{ -+ __aarch64_have_sme = HAVE_SME_CTOR; -+} -+#endif -diff --git a/libgcc/config/aarch64/__arm_sme_state.S b/libgcc/config/aarch64/__arm_sme_state.S -new file mode 100644 -index 000000000..c4e16cac0 ---- /dev/null -+++ b/libgcc/config/aarch64/__arm_sme_state.S -@@ -0,0 +1,55 @@ -+/* Support routine for SME. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published -+ by the Free Software Foundation; either version 3, or (at your -+ option) any later version. -+ -+ GCC is distributed in the hope that it will be useful, but WITHOUT -+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -+ License for more details. -+ -+ Under Section 7 of GPL version 3, you are granted additional -+ permissions described in the GCC Runtime Library Exception, version -+ 3.1, as published by the Free Software Foundation. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ . */ -+ -+#include "aarch64-asm.h" -+ -+/* Query SME state. Call ABI: -+ - Private ZA, streaming-compatible. -+ - x2-x15, x19-x29, sp and fp regs are call preserved. -+ - Takes no argument. -+ - Returns SME state in x0 and TPIDR2_EL0 in x1. */ -+ -+.hidden __aarch64_have_sme -+ -+variant_pcs (__arm_sme_state) -+ -+ENTRY (__arm_sme_state) -+ /* Check if SME is available. */ -+ adrp x1, __aarch64_have_sme -+ ldrb w1, [x1, :lo12:__aarch64_have_sme] -+ cbz w1, L(nosme) -+ -+ /* Expose the bottom 2 bits of svcr (SM, ZA) in x0 and set the -+ top 2 bits indicating that SME and TPIDR2_EL0 are available. */ -+ .inst 0xd53b4240 /* mrs x0, svcr */ -+ .inst 0xd53bd0a1 /* mrs x1, tpidr2_el0 */ -+ and x0, x0, 3 -+ orr x0, x0, 0xc000000000000000 -+ ret -+ -+L(nosme): -+ mov x0, 0 -+ mov x1, 0 -+ ret -+END (__arm_sme_state) -diff --git a/libgcc/config/aarch64/__arm_tpidr2_restore.S b/libgcc/config/aarch64/__arm_tpidr2_restore.S -new file mode 100644 -index 000000000..4569d04a2 ---- /dev/null -+++ b/libgcc/config/aarch64/__arm_tpidr2_restore.S -@@ -0,0 +1,89 @@ -+/* Support routine for SME. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published -+ by the Free Software Foundation; either version 3, or (at your -+ option) any later version. -+ -+ GCC is distributed in the hope that it will be useful, but WITHOUT -+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -+ License for more details. -+ -+ Under Section 7 of GPL version 3, you are granted additional -+ permissions described in the GCC Runtime Library Exception, version -+ 3.1, as published by the Free Software Foundation. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ . */ -+ -+#include "aarch64-asm.h" -+ -+/* Used for lazy ZA restore. Call ABI: -+ - Shared ZA, streaming-compatible. -+ - x0 is a pointer to a TPIDR2 block. -+ - x0-x13, x19-x29, sp and fp regs are call preserved. -+ - Does not return a value. -+ - Can abort on failure (then registers are not preserved). */ -+ -+variant_pcs (__arm_tpidr2_restore) -+ -+ENTRY (__arm_tpidr2_restore) -+ .inst 0xd53bd0ae /* mrs x14, tpidr2_el0 */ -+ cbnz x14, L(fail) -+ -+ /* check reserved bytes. */ -+ ldrh w15, [x0, 10] -+ ldr w16, [x0, 12] -+ orr w15, w15, w16 -+ cbnz w15, L(fail) -+ -+ ldr x16, [x0] -+ cbz x16, L(end) -+ ldrh w17, [x0, 8] -+ cbz w17, L(end) -+ -+ /* x0: blk, x14: 0, x15: 0, -+ x16: za_save_buffer, x17: num_za_save_slices. */ -+ -+L(restore_loop): -+ .inst 0xe1006200 /* ldr za[w15, 0], [x16] */ -+ .inst 0xe1006201 /* ldr za[w15, 1], [x16, 1, mul vl] */ -+ .inst 0xe1006202 /* ldr za[w15, 2], [x16, 2, mul vl] */ -+ .inst 0xe1006203 /* ldr za[w15, 3], [x16, 3, mul vl] */ -+ .inst 0xe1006204 /* ldr za[w15, 4], [x16, 4, mul vl] */ -+ .inst 0xe1006205 /* ldr za[w15, 5], [x16, 5, mul vl] */ -+ .inst 0xe1006206 /* ldr za[w15, 6], [x16, 6, mul vl] */ -+ .inst 0xe1006207 /* ldr za[w15, 7], [x16, 7, mul vl] */ -+ .inst 0xe1006208 /* ldr za[w15, 8], [x16, 8, mul vl] */ -+ .inst 0xe1006209 /* ldr za[w15, 9], [x16, 9, mul vl] */ -+ .inst 0xe100620a /* ldr za[w15, 10], [x16, 10, mul vl] */ -+ .inst 0xe100620b /* ldr za[w15, 11], [x16, 11, mul vl] */ -+ .inst 0xe100620c /* ldr za[w15, 12], [x16, 12, mul vl] */ -+ .inst 0xe100620d /* ldr za[w15, 13], [x16, 13, mul vl] */ -+ .inst 0xe100620e /* ldr za[w15, 14], [x16, 14, mul vl] */ -+ .inst 0xe100620f /* ldr za[w15, 15], [x16, 15, mul vl] */ -+ add w15, w15, 16 -+ .inst 0x04305a10 /* addsvl x16, x16, 16 */ -+ cmp w17, w15 -+ bhi L(restore_loop) -+L(end): -+ ret -+L(fail): -+ PACIASP -+ stp x29, x30, [sp, -32]! -+ .cfi_adjust_cfa_offset 32 -+ .cfi_rel_offset x29, 0 -+ .cfi_rel_offset x30, 8 -+ mov x29, sp -+ .inst 0x04e0e3f0 /* cntd x16 */ -+ str x16, [sp, 16] -+ .cfi_rel_offset 46, 16 -+ .inst 0xd503467f /* smstop */ -+ bl abort -+END (__arm_tpidr2_restore) -diff --git a/libgcc/config/aarch64/__arm_tpidr2_save.S b/libgcc/config/aarch64/__arm_tpidr2_save.S -new file mode 100644 -index 000000000..879cf7980 ---- /dev/null -+++ b/libgcc/config/aarch64/__arm_tpidr2_save.S -@@ -0,0 +1,101 @@ -+/* Support routine for SME. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published -+ by the Free Software Foundation; either version 3, or (at your -+ option) any later version. -+ -+ GCC is distributed in the hope that it will be useful, but WITHOUT -+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -+ License for more details. -+ -+ Under Section 7 of GPL version 3, you are granted additional -+ permissions described in the GCC Runtime Library Exception, version -+ 3.1, as published by the Free Software Foundation. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ . */ -+ -+#include "aarch64-asm.h" -+ -+/* Used for lazy ZA save. Call ABI: -+ - Private ZA, streaming-compatible. -+ - x0-x13, x19-x29, sp and fp regs are call preserved. -+ - Takes no argument. -+ - Does not return a value. -+ - Can abort on failure (then registers are not preserved). */ -+ -+.hidden __aarch64_have_sme -+ -+variant_pcs (__arm_tpidr2_save) -+ -+ENTRY (__arm_tpidr2_save) -+ /* Check if SME is available. */ -+ adrp x14, __aarch64_have_sme -+ ldrb w14, [x14, :lo12:__aarch64_have_sme] -+ cbz w14, L(end) -+ -+ .inst 0xd53bd0ae /* mrs x14, tpidr2_el0 */ -+ cbz x14, L(end) -+ -+ /* check reserved bytes. */ -+ ldrh w15, [x14, 10] -+ ldr w16, [x14, 12] -+ orr w15, w15, w16 -+ cbnz w15, L(fail) -+ -+ ldr x16, [x14] -+ cbz x16, L(end) -+ ldrh w17, [x14, 8] -+ cbz w17, L(end) -+ -+ /* x14: tpidr2, x15: 0, -+ x16: za_save_buffer, x17: num_za_save_slices. */ -+ -+L(save_loop): -+ .inst 0xe1206200 /* str za[w15, 0], [x16] */ -+ .inst 0xe1206201 /* str za[w15, 1], [x16, 1, mul vl] */ -+ .inst 0xe1206202 /* str za[w15, 2], [x16, 2, mul vl] */ -+ .inst 0xe1206203 /* str za[w15, 3], [x16, 3, mul vl] */ -+ .inst 0xe1206204 /* str za[w15, 4], [x16, 4, mul vl] */ -+ .inst 0xe1206205 /* str za[w15, 5], [x16, 5, mul vl] */ -+ .inst 0xe1206206 /* str za[w15, 6], [x16, 6, mul vl] */ -+ .inst 0xe1206207 /* str za[w15, 7], [x16, 7, mul vl] */ -+ .inst 0xe1206208 /* str za[w15, 8], [x16, 8, mul vl] */ -+ .inst 0xe1206209 /* str za[w15, 9], [x16, 9, mul vl] */ -+ .inst 0xe120620a /* str za[w15, 10], [x16, 10, mul vl] */ -+ .inst 0xe120620b /* str za[w15, 11], [x16, 11, mul vl] */ -+ .inst 0xe120620c /* str za[w15, 12], [x16, 12, mul vl] */ -+ .inst 0xe120620d /* str za[w15, 13], [x16, 13, mul vl] */ -+ .inst 0xe120620e /* str za[w15, 14], [x16, 14, mul vl] */ -+ .inst 0xe120620f /* str za[w15, 15], [x16, 15, mul vl] */ -+ add w15, w15, 16 -+ .inst 0x04305a10 /* addsvl x16, x16, 16 */ -+ cmp w17, w15 -+ bhi L(save_loop) -+L(end): -+ ret -+L(fail): -+ PACIASP -+ stp x29, x30, [sp, -32]! -+ .cfi_adjust_cfa_offset 32 -+ .cfi_rel_offset x29, 0 -+ .cfi_rel_offset x30, 8 -+ mov x29, sp -+ .inst 0x04e0e3f0 /* cntd x16 */ -+ str x16, [sp, 16] -+ .cfi_rel_offset 46, 16 -+ .inst 0xd503467f /* smstop */ -+ bl abort -+END (__arm_tpidr2_save) -+ -+/* Hidden alias used by __arm_za_disable. */ -+.global __libgcc_arm_tpidr2_save -+.hidden __libgcc_arm_tpidr2_save -+.set __libgcc_arm_tpidr2_save, __arm_tpidr2_save -diff --git a/libgcc/config/aarch64/__arm_za_disable.S b/libgcc/config/aarch64/__arm_za_disable.S -new file mode 100644 -index 000000000..cff5b9cec ---- /dev/null -+++ b/libgcc/config/aarch64/__arm_za_disable.S -@@ -0,0 +1,65 @@ -+/* Support routine for SME. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published -+ by the Free Software Foundation; either version 3, or (at your -+ option) any later version. -+ -+ GCC is distributed in the hope that it will be useful, but WITHOUT -+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -+ License for more details. -+ -+ Under Section 7 of GPL version 3, you are granted additional -+ permissions described in the GCC Runtime Library Exception, version -+ 3.1, as published by the Free Software Foundation. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ . */ -+ -+#include "aarch64-asm.h" -+ -+/* Disable ZA. Call ABI: -+ - Private ZA, streaming-compatible. -+ - x0-x13, x19-x29, sp and fp regs are call preserved. -+ - Takes no argument. -+ - Does not return a value. -+ - Can abort on failure (then registers are not preserved). */ -+ -+.hidden __aarch64_have_sme -+ -+.hidden __libgcc_arm_tpidr2_save -+ -+variant_pcs (__arm_za_disable) -+ -+ENTRY (__arm_za_disable) -+ /* Check if SME is available. */ -+ adrp x14, __aarch64_have_sme -+ ldrb w14, [x14, :lo12:__aarch64_have_sme] -+ cbz w14, L(end) -+ -+ .inst 0xd53bd0ae /* mrs x14, tpidr2_el0 */ -+ cbz x14, L(end) -+ -+ PACIASP -+ stp x29, x30, [sp, -16]! -+ .cfi_adjust_cfa_offset 16 -+ .cfi_rel_offset x29, 0 -+ .cfi_rel_offset x30, 8 -+ mov x29, sp -+ bl __libgcc_arm_tpidr2_save -+ .inst 0xd51bd0bf /* msr tpidr2_el0, xzr */ -+ .inst 0xd503447f /* smstop za */ -+ ldp x29, x30, [sp], 16 -+ .cfi_adjust_cfa_offset -16 -+ .cfi_restore x29 -+ .cfi_restore x30 -+ AUTIASP -+L(end): -+ ret -+END (__arm_za_disable) -diff --git a/libgcc/config/aarch64/aarch64-asm.h b/libgcc/config/aarch64/aarch64-asm.h -new file mode 100644 -index 000000000..8969b06b0 ---- /dev/null -+++ b/libgcc/config/aarch64/aarch64-asm.h -@@ -0,0 +1,98 @@ -+/* AArch64 asm definitions. -+ Copyright (C) 2023 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published -+ by the Free Software Foundation; either version 3, or (at your -+ option) any later version. -+ -+ GCC is distributed in the hope that it will be useful, but WITHOUT -+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -+ License for more details. -+ -+ Under Section 7 of GPL version 3, you are granted additional -+ permissions described in the GCC Runtime Library Exception, version -+ 3.1, as published by the Free Software Foundation. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ . */ -+ -+#include "auto-target.h" -+ -+#define L(label) .L ## label -+ -+/* Marking variant PCS symbol references is important for PLT calls -+ otherwise it is for documenting the PCS in the symbol table. */ -+#ifdef HAVE_AS_VARIANT_PCS -+# define variant_pcs(name) .variant_pcs name -+#else -+# define variant_pcs(name) -+#endif -+ -+/* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */ -+#define FEATURE_1_AND 0xc0000000 -+#define FEATURE_1_BTI 1 -+#define FEATURE_1_PAC 2 -+ -+/* Supported features based on the code generation options. */ -+#if defined(__ARM_FEATURE_BTI_DEFAULT) -+# define BTI_FLAG FEATURE_1_BTI -+# define BTI_C hint 34 -+#else -+# define BTI_FLAG 0 -+# define BTI_C -+#endif -+ -+#if __ARM_FEATURE_PAC_DEFAULT & 3 -+# define PAC_FLAG FEATURE_1_PAC -+# define PACIASP hint 25; .cfi_window_save -+# define AUTIASP hint 29; .cfi_window_save -+#else -+# define PAC_FLAG 0 -+# define PACIASP -+# define AUTIASP -+#endif -+ -+/* Add a NT_GNU_PROPERTY_TYPE_0 note. */ -+#define GNU_PROPERTY(type, value) \ -+ .section .note.gnu.property, "a"; \ -+ .p2align 3; \ -+ .word 4; \ -+ .word 16; \ -+ .word 5; \ -+ .asciz "GNU"; \ -+ .word type; \ -+ .word 4; \ -+ .word value; \ -+ .word 0; \ -+ .previous -+ -+#if defined(__linux__) || defined(__FreeBSD__) -+/* Do not require executable stack. */ -+.section .note.GNU-stack, "", %progbits -+.previous -+ -+/* Add GNU property note if built with branch protection. */ -+# if (BTI_FLAG|PAC_FLAG) != 0 -+GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG) -+# endif -+#endif -+ -+#define ENTRY_ALIGN(name, align) \ -+ .global name; \ -+ .type name,%function; \ -+ .balign align; \ -+ name: \ -+ .cfi_startproc; \ -+ BTI_C -+ -+#define ENTRY(name) ENTRY_ALIGN(name, 16) -+ -+#define END(name) \ -+ .cfi_endproc; \ -+ .size name, .-name -diff --git a/libgcc/config/aarch64/libgcc-sme.ver b/libgcc/config/aarch64/libgcc-sme.ver -new file mode 100644 -index 000000000..da889c6c0 ---- /dev/null -+++ b/libgcc/config/aarch64/libgcc-sme.ver -@@ -0,0 +1,24 @@ -+# Copyright (C) 2023 Free Software Foundation, Inc. -+# -+# This file is part of GCC. -+# -+# GCC is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3, or (at your option) -+# any later version. -+# -+# GCC is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# . -+ -+GCC_14.0 { -+ __arm_sme_state -+ __arm_tpidr2_restore -+ __arm_tpidr2_save -+ __arm_za_disable -+} -diff --git a/libgcc/config/aarch64/t-aarch64 b/libgcc/config/aarch64/t-aarch64 -index 8ca803bd3..5a8feb184 100644 ---- a/libgcc/config/aarch64/t-aarch64 -+++ b/libgcc/config/aarch64/t-aarch64 -@@ -19,3 +19,13 @@ - # . - - LIB2ADD += $(srcdir)/config/aarch64/sync-cache.c -+ -+# Add sme runtime to shared libgcc -+LIB2ADDEH += \ -+ $(srcdir)/config/aarch64/__aarch64_have_sme.c \ -+ $(srcdir)/config/aarch64/__arm_sme_state.S \ -+ $(srcdir)/config/aarch64/__arm_tpidr2_restore.S \ -+ $(srcdir)/config/aarch64/__arm_tpidr2_save.S \ -+ $(srcdir)/config/aarch64/__arm_za_disable.S -+ -+SHLIB_MAPFILES += $(srcdir)/config/aarch64/libgcc-sme.ver --- -2.33.0 - diff --git a/0219-Backport-SME-libgcc-aarch64-Add-SME-unwinder-support.patch b/0219-Backport-SME-libgcc-aarch64-Add-SME-unwinder-support.patch deleted file mode 100644 index 91c5d4b..0000000 --- a/0219-Backport-SME-libgcc-aarch64-Add-SME-unwinder-support.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 310c8b5aaedad1430146fed9d8992201278164a6 Mon Sep 17 00:00:00 2001 -From: Szabolcs Nagy -Date: Fri, 29 Sep 2023 13:55:51 +0100 -Subject: [PATCH 120/157] [Backport][SME] libgcc: aarch64: Add SME unwinder - support - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=91d68665b8b7a5dffd0bbf8cd1f74c3c41d4c2d8 - -To support the ZA lazy save scheme, the PCS requires the unwinder to -reset the SME state to PSTATE.SM=0, PSTATE.ZA=0, TPIDR2_EL0=0 on entry -to an exception handler. We use the __arm_za_disable SME runtime call -unconditionally to achieve this. -https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#exceptions - -The hidden alias is used to avoid a PLT and avoid inconsistent VPCS -marking (we don't rely on special PCS at the call site). In case of -static linking the SME runtime init code is linked in code that raises -exceptions. - -libgcc/ChangeLog: - - * config/aarch64/__arm_za_disable.S: Add hidden alias. - * config/aarch64/aarch64-unwind.h: Reset the SME state before - EH return via the _Unwind_Frames_Extra hook. ---- - libgcc/config/aarch64/__arm_za_disable.S | 5 +++++ - libgcc/config/aarch64/aarch64-unwind.h | 16 ++++++++++++++++ - 2 files changed, 21 insertions(+) - -diff --git a/libgcc/config/aarch64/__arm_za_disable.S b/libgcc/config/aarch64/__arm_za_disable.S -index cff5b9cec..03fc28a39 100644 ---- a/libgcc/config/aarch64/__arm_za_disable.S -+++ b/libgcc/config/aarch64/__arm_za_disable.S -@@ -63,3 +63,8 @@ ENTRY (__arm_za_disable) - L(end): - ret - END (__arm_za_disable) -+ -+/* Hidden alias used by the unwinder. */ -+.global __libgcc_arm_za_disable -+.hidden __libgcc_arm_za_disable -+.set __libgcc_arm_za_disable, __arm_za_disable -diff --git a/libgcc/config/aarch64/aarch64-unwind.h b/libgcc/config/aarch64/aarch64-unwind.h -index 40b22d3c2..bfa695dcb 100644 ---- a/libgcc/config/aarch64/aarch64-unwind.h -+++ b/libgcc/config/aarch64/aarch64-unwind.h -@@ -87,4 +87,20 @@ aarch64_frob_update_context (struct _Unwind_Context *context, - return; - } - -+/* SME runtime function local to libgcc, streaming compatible -+ and preserves more registers than the base PCS requires, but -+ we don't rely on that here. */ -+__attribute__ ((visibility ("hidden"))) -+void __libgcc_arm_za_disable (void); -+ -+/* Disable the SME ZA state in case an unwound frame used the ZA -+ lazy saving scheme. */ -+#undef _Unwind_Frames_Extra -+#define _Unwind_Frames_Extra(x) \ -+ do \ -+ { \ -+ __libgcc_arm_za_disable (); \ -+ } \ -+ while (0) -+ - #endif /* defined AARCH64_UNWIND_H && defined __ILP32__ */ --- -2.33.0 - diff --git a/0220-Backport-SME-libgcc-Fix-config.in.patch b/0220-Backport-SME-libgcc-Fix-config.in.patch deleted file mode 100644 index 58b57e1..0000000 --- a/0220-Backport-SME-libgcc-Fix-config.in.patch +++ /dev/null @@ -1,51 +0,0 @@ -From b20b75158d1230a8b6cbabb36e3b128cbd9ec86f Mon Sep 17 00:00:00 2001 -From: Szabolcs Nagy -Date: Fri, 8 Dec 2023 12:22:54 +0000 -Subject: [PATCH 121/157] [Backport][SME] libgcc: Fix config.in - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=47575ec9edcd3078f066aa54ba428420be796bef - -It was updated incorrectly in - - commit dbbfb52b0e9c66ee9d05b8fd17c4f44655e48463 - Author: Szabolcs Nagy - CommitDate: 2023-12-08 11:29:06 +0000 - - libgcc: aarch64: Configure check for __getauxval - -so regenerate it. - -libgcc/ChangeLog: - - * config.in: Regenerate. ---- - libgcc/config.in | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/libgcc/config.in b/libgcc/config.in -index 441d4d39b..8f7dd437b 100644 ---- a/libgcc/config.in -+++ b/libgcc/config.in -@@ -16,9 +16,6 @@ - /* Define to 1 if the assembler supports .variant_pcs. */ - #undef HAVE_AS_VARIANT_PCS - --/* Define to 1 if __getauxval is available. */ --#undef HAVE___GETAUXVAL -- - /* Define to 1 if the target assembler supports thread-local storage. */ - #undef HAVE_CC_TLS - -@@ -67,6 +64,9 @@ - /* Define to 1 if you have the header file. */ - #undef HAVE_UNISTD_H - -+/* Define to 1 if __getauxval is available. */ -+#undef HAVE___GETAUXVAL -+ - /* Define to the address where bug reports for this package should be sent. */ - #undef PACKAGE_BUGREPORT - --- -2.33.0 - diff --git a/0221-Backport-SME-aarch64-Add-funwind-tables-to-some-test.patch b/0221-Backport-SME-aarch64-Add-funwind-tables-to-some-test.patch deleted file mode 100644 index 2bd3268..0000000 --- a/0221-Backport-SME-aarch64-Add-funwind-tables-to-some-test.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 0214ca06a182481851ed90aae21f460f87d26084 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Sun, 10 Dec 2023 19:46:05 +0000 -Subject: [PATCH 122/157] [Backport][SME] aarch64: Add -funwind-tables to some - tests - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=02ecdaab7a50f4505fd905effb6d238d773dc813 - -The .cfi scans in these tests failed for *-elf targets because -those targets don't enable .eh_frame info by default. - -gcc/testsuite/ - * gcc.target/aarch64/sme/call_sm_switch_1.c: Add -funwind-tables. - * gcc.target/aarch64/sme/call_sm_switch_3.c: Likewise. - * gcc.target/aarch64/sme/call_sm_switch_5.c: Likewise. ---- - gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c | 2 +- - gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c | 2 +- - gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c | 2 +- - 3 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c -index a2de55773..98922aaea 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c -@@ -1,4 +1,4 @@ --// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } -+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" } - // { dg-final { check-function-bodies "**" "" } } - - void ns_callee (); -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c -index ed999d085..4250fe798 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c -+++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_3.c -@@ -1,4 +1,4 @@ --// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } -+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" } - // { dg-final { check-function-bodies "**" "" } } - - __attribute__((aarch64_vector_pcs)) void ns_callee (); -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c -index be9b5cc04..e3d9bc274 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c -+++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c -@@ -1,4 +1,4 @@ --// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls" } -+// { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" } - // { dg-final { check-function-bodies "**" "" } } - - #include --- -2.33.0 - diff --git a/0222-Backport-SME-aarch64-Skip-some-SME-register-save-tes.patch b/0222-Backport-SME-aarch64-Skip-some-SME-register-save-tes.patch deleted file mode 100644 index 0b0dbd9..0000000 --- a/0222-Backport-SME-aarch64-Skip-some-SME-register-save-tes.patch +++ /dev/null @@ -1,106 +0,0 @@ -From cc2e901eccd40992432f74270a9ebc1b708b6eb1 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Sun, 10 Dec 2023 19:46:05 +0000 -Subject: [PATCH 123/157] [Backport][SME] aarch64: Skip some SME register save - tests on BE - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=23ea0bc2cf042d74c4adfe26a57cf96b1d837a91 - -Big-endian targets need to save Z8-Z15 in the same order as -the registers would appear for D8-D15, because the layout is -mandated by the EH ABI. BE targets therefore use ST1D instead -of the normal STR for those registers (but not for others). - -That difference is already tested elsewhere and isn't important -for the SME tests. This patch therefore restricts the affected -tests to LE. - -gcc/testsuite/ - * gcc.target/aarch64/sme/call_sm_switch_5.c: Restrict tests that - contain Z8-Z23 saves to little-endian. - * gcc.target/aarch64/sme/call_sm_switch_8.c: Likewise. - * gcc.target/aarch64/sme/locally_streaming_1.c: Likewise. ---- - gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c | 6 +++--- - gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c | 6 +++--- - gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c | 2 +- - 3 files changed, 7 insertions(+), 7 deletions(-) - -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c -index e3d9bc274..6238ab80d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c -+++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_5.c -@@ -14,7 +14,7 @@ struct callbacks { - }; - - /* --** n_caller: { target lp64 } -+** n_caller: { target { lp64 && aarch64_little_endian } } - ** stp x30, (x19|x2[0-8]), \[sp, #?-32\]! - ** cntd x16 - ** str x16, \[sp, #?16\] -@@ -114,7 +114,7 @@ n_caller (struct callbacks *c) - } - - /* --** s_caller: { target lp64 } -+** s_caller: { target { lp64 && aarch64_little_endian } } - ** stp x30, (x19|x2[0-8]), \[sp, #?-32\]! - ** cntd x16 - ** str x16, \[sp, #?16\] -@@ -214,7 +214,7 @@ s_caller (struct callbacks *c) [[arm::streaming]] - } - - /* --** sc_caller: -+** sc_caller: { target aarch64_little_endian } - ** stp x29, x30, \[sp, #?-32\]! - ** mov x29, sp - ** cntd x16 -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c -index f44724df3..c909b34ff 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c -+++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_8.c -@@ -7,7 +7,7 @@ svint8_t produce_z0 (); - void consume_z0 (svint8_t); - - /* --** test_z0: -+** test_z0: { target aarch64_little_endian } - ** ... - ** smstop sm - ** bl produce_z0 -@@ -32,7 +32,7 @@ svint8x4_t produce_z3 (); - void consume_z3 (svint8x4_t); - - /* --** test_z3: -+** test_z3: { target aarch64_little_endian } - ** ... - ** smstop sm - ** bl produce_z3 -@@ -61,7 +61,7 @@ svbool_t produce_p0 (); - void consume_p0 (svbool_t); - - /* --** test_p0: -+** test_p0: { target aarch64_little_endian } - ** ... - ** smstop sm - ** bl produce_p0 -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c -index 20ff4b87d..4bb637f47 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sme/locally_streaming_1.c -@@ -265,7 +265,7 @@ n_ls_vector_pcs () - } - - /* --** n_ls_sve_pcs: -+** n_ls_sve_pcs: { target aarch64_little_endian } - ** sub sp, sp, #?16 - ** cntd x16 - ** str x16, \[sp\] --- -2.33.0 - diff --git a/0223-Backport-SME-Add-OPTIONS_H_EXTRA-to-GTFILES.patch b/0223-Backport-SME-Add-OPTIONS_H_EXTRA-to-GTFILES.patch deleted file mode 100644 index edbbd5a..0000000 --- a/0223-Backport-SME-Add-OPTIONS_H_EXTRA-to-GTFILES.patch +++ /dev/null @@ -1,37 +0,0 @@ -From ab7a2c3b74c65d62d661621c56ef984cfb72f985 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 29 Sep 2022 11:32:50 +0100 -Subject: [PATCH 124/157] [Backport][SME] Add OPTIONS_H_EXTRA to GTFILES - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c1e1fa054970a30844eb94d726b4954dcb8b9063 - -I have a patch that adds a typedef to aarch64's -opts.h. -The typedef is used for a TargetVariable in the .opt file, -which means that it is covered by PCH and so needs to be -visible to gengtype. - --opts.h is not included directly in tm.h, but indirectly -by target headers (in this case aarch64.h). There was therefore -nothing that caused it to be added to GTFILES. - -gcc/ - * Makefile.in (GTFILES): Add OPTIONS_H_EXTRA. ---- - gcc/Makefile.in | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/gcc/Makefile.in b/gcc/Makefile.in -index 5cd838270..fcfa54697 100644 ---- a/gcc/Makefile.in -+++ b/gcc/Makefile.in -@@ -2648,6 +2648,7 @@ s-match: build/genmatch$(build_exeext) $(srcdir)/match.pd cfn-operators.pd - - GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ - $(host_xm_file_list) \ -+ $(OPTIONS_H_EXTRA) \ - $(tm_file_list) $(HASHTAB_H) $(SPLAY_TREE_H) $(srcdir)/bitmap.h \ - $(srcdir)/wide-int.h $(srcdir)/alias.h \ - $(srcdir)/coverage.cc $(srcdir)/rtl.h \ --- -2.33.0 - diff --git a/0224-Backport-SME-aarch64-Add-V1DI-mode.patch b/0224-Backport-SME-aarch64-Add-V1DI-mode.patch deleted file mode 100644 index 401391c..0000000 --- a/0224-Backport-SME-aarch64-Add-V1DI-mode.patch +++ /dev/null @@ -1,177 +0,0 @@ -From 21f9190106f8324be42e3e8e0510467386dd68a0 Mon Sep 17 00:00:00 2001 -From: Andrew Carlotti -Date: Fri, 15 Jul 2022 15:25:53 +0100 -Subject: [PATCH 125/157] [Backport][SME] aarch64: Add V1DI mode - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5ba864c5d11a1c20891a1e054cb7814ec23de5c9 - -We already have a V1DF mode, so this makes the vector modes more consistent. - -Additionally, this allows us to recognise uint64x1_t and int64x1_t types given -only the mode and type qualifiers (e.g. in aarch64_lookup_simd_builtin_type). - -gcc/ChangeLog: - - * config/aarch64/aarch64-builtins.cc - (v1di_UP): Add V1DI mode to _UP macros. - * config/aarch64/aarch64-modes.def (VECTOR_MODE): Add V1DI mode. - * config/aarch64/aarch64-simd-builtin-types.def: Use V1DI mode. - * config/aarch64/aarch64-simd.md - (vec_extractv2dfv1df): Replace with... - (vec_extract): ...this. - * config/aarch64/aarch64.cc - (aarch64_classify_vector_mode): Add V1DI mode. - * config/aarch64/iterators.md - (VQ_2E, V1HALF, V1half): New. - (nunits): Add V1DI mode. ---- - gcc/config/aarch64/aarch64-builtins.cc | 1 + - gcc/config/aarch64/aarch64-modes.def | 1 + - gcc/config/aarch64/aarch64-simd-builtin-types.def | 6 +++--- - gcc/config/aarch64/aarch64-simd.md | 14 +++++++------- - gcc/config/aarch64/aarch64.cc | 2 +- - gcc/config/aarch64/iterators.md | 14 ++++++++++++-- - 6 files changed, 25 insertions(+), 13 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc -index 015e9d975..37bb3af48 100644 ---- a/gcc/config/aarch64/aarch64-builtins.cc -+++ b/gcc/config/aarch64/aarch64-builtins.cc -@@ -55,6 +55,7 @@ - #define v2si_UP E_V2SImode - #define v2sf_UP E_V2SFmode - #define v1df_UP E_V1DFmode -+#define v1di_UP E_V1DImode - #define di_UP E_DImode - #define df_UP E_DFmode - #define v16qi_UP E_V16QImode -diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def -index 8fa66fdb3..dd74da4b3 100644 ---- a/gcc/config/aarch64/aarch64-modes.def -+++ b/gcc/config/aarch64/aarch64-modes.def -@@ -70,6 +70,7 @@ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI. */ - VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI. */ - VECTOR_MODES (FLOAT, 8); /* V2SF. */ - VECTOR_MODES (FLOAT, 16); /* V4SF V2DF. */ -+VECTOR_MODE (INT, DI, 1); /* V1DI. */ - VECTOR_MODE (FLOAT, DF, 1); /* V1DF. */ - VECTOR_MODE (FLOAT, HF, 2); /* V2HF. */ - -diff --git a/gcc/config/aarch64/aarch64-simd-builtin-types.def b/gcc/config/aarch64/aarch64-simd-builtin-types.def -index 248e51e96..405455814 100644 ---- a/gcc/config/aarch64/aarch64-simd-builtin-types.def -+++ b/gcc/config/aarch64/aarch64-simd-builtin-types.def -@@ -24,7 +24,7 @@ - ENTRY (Int16x8_t, V8HI, none, 11) - ENTRY (Int32x2_t, V2SI, none, 11) - ENTRY (Int32x4_t, V4SI, none, 11) -- ENTRY (Int64x1_t, DI, none, 11) -+ ENTRY (Int64x1_t, V1DI, none, 11) - ENTRY (Int64x2_t, V2DI, none, 11) - ENTRY (Uint8x8_t, V8QI, unsigned, 11) - ENTRY (Uint8x16_t, V16QI, unsigned, 12) -@@ -32,7 +32,7 @@ - ENTRY (Uint16x8_t, V8HI, unsigned, 12) - ENTRY (Uint32x2_t, V2SI, unsigned, 12) - ENTRY (Uint32x4_t, V4SI, unsigned, 12) -- ENTRY (Uint64x1_t, DI, unsigned, 12) -+ ENTRY (Uint64x1_t, V1DI, unsigned, 12) - ENTRY (Uint64x2_t, V2DI, unsigned, 12) - ENTRY (Poly8_t, QI, poly, 9) - ENTRY (Poly16_t, HI, poly, 10) -@@ -42,7 +42,7 @@ - ENTRY (Poly8x16_t, V16QI, poly, 12) - ENTRY (Poly16x4_t, V4HI, poly, 12) - ENTRY (Poly16x8_t, V8HI, poly, 12) -- ENTRY (Poly64x1_t, DI, poly, 12) -+ ENTRY (Poly64x1_t, V1DI, poly, 12) - ENTRY (Poly64x2_t, V2DI, poly, 12) - ENTRY (Float16x4_t, V4HF, none, 13) - ENTRY (Float16x8_t, V8HF, none, 13) -diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md -index 62493cdfa..04592fc90 100644 ---- a/gcc/config/aarch64/aarch64-simd.md -+++ b/gcc/config/aarch64/aarch64-simd.md -@@ -8326,16 +8326,16 @@ - }) - - ;; Extract a single-element 64-bit vector from one half of a 128-bit vector. --(define_expand "vec_extractv2dfv1df" -- [(match_operand:V1DF 0 "register_operand") -- (match_operand:V2DF 1 "register_operand") -+(define_expand "vec_extract" -+ [(match_operand: 0 "register_operand") -+ (match_operand:VQ_2E 1 "register_operand") - (match_operand 2 "immediate_operand")] - "TARGET_SIMD" - { -- /* V1DF is rarely used by other patterns, so it should be better to hide -- it in a subreg destination of a normal DF op. */ -- rtx scalar0 = gen_lowpart (DFmode, operands[0]); -- emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2])); -+ /* V1DI and V1DF are rarely used by other patterns, so it should be better -+ to hide it in a subreg destination of a normal DI or DF op. */ -+ rtx scalar0 = gen_lowpart (mode, operands[0]); -+ emit_insn (gen_vec_extract (scalar0, operands[1], operands[2])); - DONE; - }) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index b8e540b6e..f7285555b 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -4117,7 +4117,7 @@ aarch64_classify_vector_mode (machine_mode mode) - case E_V8QImode: - case E_V4HImode: - case E_V2SImode: -- /* ...E_V1DImode doesn't exist. */ -+ case E_V1DImode: - case E_V4HFmode: - case E_V4BFmode: - case E_V2SFmode: -diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md -index 152d28f6b..94db8c53f 100644 ---- a/gcc/config/aarch64/iterators.md -+++ b/gcc/config/aarch64/iterators.md -@@ -138,6 +138,9 @@ - ;; VQ without 2 element modes. - (define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF V8BF]) - -+;; 2 element quad vector modes. -+(define_mode_iterator VQ_2E [V2DI V2DF]) -+ - ;; BFmode vector modes. - (define_mode_iterator VBF [V4BF V8BF]) - -@@ -1116,12 +1119,13 @@ - (define_mode_attr nunits [(V8QI "8") (V16QI "16") - (V4HI "4") (V8HI "8") - (V2SI "2") (V4SI "4") -- (V2DI "2") (V8DI "8") -+ (V1DI "1") (V2DI "2") - (V4HF "4") (V8HF "8") - (V4BF "4") (V8BF "8") - (V2SF "2") (V4SF "4") - (V1DF "1") (V2DF "2") -- (DI "1") (DF "1")]) -+ (DI "1") (DF "1") -+ (V8DI "8")]) - - ;; Map a mode to the number of bits in it, if the size of the mode - ;; is constant. -@@ -1501,6 +1505,12 @@ - (V2DI "di") (V2SF "sf") - (V4SF "v2sf") (V2DF "df")]) - -+;; Single-element half modes of quad vector modes. -+(define_mode_attr V1HALF [(V2DI "V1DI") (V2DF "V1DF")]) -+ -+;; Single-element half modes of quad vector modes, in lower-case -+(define_mode_attr V1half [(V2DI "v1di") (V2DF "v1df")]) -+ - ;; Double modes of vector modes. - (define_mode_attr VDBL [(V8QI "V16QI") (V4HI "V8HI") - (V4HF "V8HF") (V4BF "V8BF") --- -2.33.0 - diff --git a/0225-Backport-SME-Allow-md-iterators-to-include-other-ite.patch b/0225-Backport-SME-Allow-md-iterators-to-include-other-ite.patch deleted file mode 100644 index fa5b887..0000000 --- a/0225-Backport-SME-Allow-md-iterators-to-include-other-ite.patch +++ /dev/null @@ -1,217 +0,0 @@ -From eaea26e2218ee61a9be0e2933548c752167dcdb5 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Fri, 10 Nov 2023 15:46:21 +0000 -Subject: [PATCH 126/157] [Backport][SME] Allow md iterators to include other - iterators - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5dbaf4851bbf56b6176dca1f1e7d38a16b5b84ee - -This patch allows an .md iterator to include the contents of -previous iterators, possibly with an extra condition attached. - -Too much indirection might become hard to follow, so for the -AArch64 changes I tried to stick to things that seemed likely -to be uncontroversial: - -(a) structure iterators that combine modes for different sizes - and vector counts - -(b) iterators that explicitly duplicate another iterator - (for iterating over the cross product) - -gcc/ - * read-rtl.cc (md_reader::read_mapping): Allow iterators to - include other iterators. - * doc/md.texi: Document the change. - * config/aarch64/iterators.md (DREG2, VQ2, TX2, DX2, SX2): Include - the iterator that is being duplicated, rather than reproducing it. - (VSTRUCT_D): Redefine using VSTRUCT_[234]D. - (VSTRUCT_Q): Likewise VSTRUCT_[234]Q. - (VSTRUCT_2QD, VSTRUCT_3QD, VSTRUCT_4QD, VSTRUCT_QD): Redefine using - the individual D and Q iterators. ---- - gcc/config/aarch64/iterators.md | 58 ++++++++------------------------- - gcc/doc/md.texi | 13 ++++++++ - gcc/read-rtl.cc | 21 ++++++++++-- - 3 files changed, 46 insertions(+), 46 deletions(-) - -diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md -index 94db8c53f..a1659dfba 100644 ---- a/gcc/config/aarch64/iterators.md -+++ b/gcc/config/aarch64/iterators.md -@@ -106,7 +106,7 @@ - (define_mode_iterator DREG [V8QI V4HI V4HF V2SI V2SF DF]) - - ;; Copy of the above. --(define_mode_iterator DREG2 [V8QI V4HI V4HF V2SI V2SF DF]) -+(define_mode_iterator DREG2 [DREG]) - - ;; All modes suitable to store/load pair (2 elements) using STP/LDP. - (define_mode_iterator VP_2E [V2SI V2SF V2DI V2DF]) -@@ -121,7 +121,7 @@ - (define_mode_iterator VQ [V16QI V8HI V4SI V2DI V8HF V4SF V2DF V8BF]) - - ;; Copy of the above. --(define_mode_iterator VQ2 [V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF]) -+(define_mode_iterator VQ2 [VQ]) - - ;; Quad vector modes suitable for moving. Includes BFmode. - (define_mode_iterator VQMOV [V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF]) -@@ -321,14 +321,6 @@ - ;; Advanced SIMD opaque structure modes. - (define_mode_iterator VSTRUCT [OI CI XI]) - --;; Advanced SIMD 64-bit vector structure modes. --(define_mode_iterator VSTRUCT_D [V2x8QI V2x4HI V2x2SI V2x1DI -- V2x4HF V2x2SF V2x1DF V2x4BF -- V3x8QI V3x4HI V3x2SI V3x1DI -- V3x4HF V3x2SF V3x1DF V3x4BF -- V4x8QI V4x4HI V4x2SI V4x1DI -- V4x4HF V4x2SF V4x1DF V4x4BF]) -- - ;; Advanced SIMD 64-bit 2-vector structure modes. - (define_mode_iterator VSTRUCT_2D [V2x8QI V2x4HI V2x2SI V2x1DI - V2x4HF V2x2SF V2x1DF V2x4BF]) -@@ -341,6 +333,9 @@ - (define_mode_iterator VSTRUCT_4D [V4x8QI V4x4HI V4x2SI V4x1DI - V4x4HF V4x2SF V4x1DF V4x4BF]) - -+;; Advanced SIMD 64-bit vector structure modes. -+(define_mode_iterator VSTRUCT_D [VSTRUCT_2D VSTRUCT_3D VSTRUCT_4D]) -+ - ;; Advanced SIMD 64-bit 2-vector structure modes minus V2x1DI and V2x1DF. - (define_mode_iterator VSTRUCT_2DNX [V2x8QI V2x4HI V2x2SI V2x4HF - V2x2SF V2x4BF]) -@@ -365,14 +360,6 @@ - ;; Advanced SIMD 64-bit 4-vector structure modes with 64-bit elements. - (define_mode_iterator VSTRUCT_4DX [V4x1DI V4x1DF]) - --;; Advanced SIMD 128-bit vector structure modes. --(define_mode_iterator VSTRUCT_Q [V2x16QI V2x8HI V2x4SI V2x2DI -- V2x8HF V2x4SF V2x2DF V2x8BF -- V3x16QI V3x8HI V3x4SI V3x2DI -- V3x8HF V3x4SF V3x2DF V3x8BF -- V4x16QI V4x8HI V4x4SI V4x2DI -- V4x8HF V4x4SF V4x2DF V4x8BF]) -- - ;; Advanced SIMD 128-bit 2-vector structure modes. - (define_mode_iterator VSTRUCT_2Q [V2x16QI V2x8HI V2x4SI V2x2DI - V2x8HF V2x4SF V2x2DF V2x8BF]) -@@ -385,49 +372,32 @@ - (define_mode_iterator VSTRUCT_4Q [V4x16QI V4x8HI V4x4SI V4x2DI - V4x8HF V4x4SF V4x2DF V4x8BF]) - -+;; Advanced SIMD 128-bit vector structure modes. -+(define_mode_iterator VSTRUCT_Q [VSTRUCT_2Q VSTRUCT_3Q VSTRUCT_4Q]) -+ - ;; Advanced SIMD 2-vector structure modes. --(define_mode_iterator VSTRUCT_2QD [V2x8QI V2x4HI V2x2SI V2x1DI -- V2x4HF V2x2SF V2x1DF V2x4BF -- V2x16QI V2x8HI V2x4SI V2x2DI -- V2x8HF V2x4SF V2x2DF V2x8BF]) -+(define_mode_iterator VSTRUCT_2QD [VSTRUCT_2D VSTRUCT_2Q]) - - ;; Advanced SIMD 3-vector structure modes. --(define_mode_iterator VSTRUCT_3QD [V3x8QI V3x4HI V3x2SI V3x1DI -- V3x4HF V3x2SF V3x1DF V3x4BF -- V3x16QI V3x8HI V3x4SI V3x2DI -- V3x8HF V3x4SF V3x2DF V3x8BF]) -+(define_mode_iterator VSTRUCT_3QD [VSTRUCT_3D VSTRUCT_3Q]) - - ;; Advanced SIMD 4-vector structure modes. --(define_mode_iterator VSTRUCT_4QD [V4x8QI V4x4HI V4x2SI V4x1DI -- V4x4HF V4x2SF V4x1DF V4x4BF -- V4x16QI V4x8HI V4x4SI V4x2DI -- V4x8HF V4x4SF V4x2DF V4x8BF]) -+(define_mode_iterator VSTRUCT_4QD [VSTRUCT_4D VSTRUCT_4Q]) - - ;; Advanced SIMD vector structure modes. --(define_mode_iterator VSTRUCT_QD [V2x8QI V2x4HI V2x2SI V2x1DI -- V2x4HF V2x2SF V2x1DF V2x4BF -- V3x8QI V3x4HI V3x2SI V3x1DI -- V3x4HF V3x2SF V3x1DF V3x4BF -- V4x8QI V4x4HI V4x2SI V4x1DI -- V4x4HF V4x2SF V4x1DF V4x4BF -- V2x16QI V2x8HI V2x4SI V2x2DI -- V2x8HF V2x4SF V2x2DF V2x8BF -- V3x16QI V3x8HI V3x4SI V3x2DI -- V3x8HF V3x4SF V3x2DF V3x8BF -- V4x16QI V4x8HI V4x4SI V4x2DI -- V4x8HF V4x4SF V4x2DF V4x8BF]) -+(define_mode_iterator VSTRUCT_QD [VSTRUCT_D VSTRUCT_Q]) - - ;; Double scalar modes - (define_mode_iterator DX [DI DF DD]) - - ;; Duplicate of the above --(define_mode_iterator DX2 [DI DF DD]) -+(define_mode_iterator DX2 [DX]) - - ;; Single scalar modes - (define_mode_iterator SX [SI SF]) - - ;; Duplicate of the above --(define_mode_iterator SX2 [SI SF]) -+(define_mode_iterator SX2 [SX]) - - ;; Single and double integer and float modes - (define_mode_iterator DSX [DF DI SF SI]) -diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi -index 04ace8f7f..c0cf0ec64 100644 ---- a/gcc/doc/md.texi -+++ b/gcc/doc/md.texi -@@ -11561,6 +11561,19 @@ to @code{@var{mode}}. For example: - means that the @code{:DI} expansion only applies if @code{TARGET_64BIT} - but that the @code{:SI} expansion has no such constraint. - -+It is also possible to include iterators in other iterators. For example: -+ -+@smallexample -+(define_mode_iterator VI [V16QI V8HI V4SI V2DI]) -+(define_mode_iterator VF [V8HF V4SF (V2DF "TARGET_DOUBLE")]) -+(define_mode_iterator V [VI (VF "TARGET_FLOAT")]) -+@end smallexample -+ -+makes @samp{:V} iterate over the modes in @code{VI} and the modes -+in @code{VF}. When a construct uses @samp{:V}, the @code{V8HF} and -+@code{V4SF} expansions require @samp{TARGET_FLOAT} while the @code{V2DF} -+expansion requires @samp{TARGET_DOUBLE && TARGET_FLOAT}. -+ - Iterators are applied in the order they are defined. This can be - significant if two iterators are used in a construct that requires - substitutions. @xref{Substitutions}. -diff --git a/gcc/read-rtl.cc b/gcc/read-rtl.cc -index 798d24859..cdfa9e7b8 100644 ---- a/gcc/read-rtl.cc -+++ b/gcc/read-rtl.cc -@@ -1261,8 +1261,25 @@ md_reader::read_mapping (struct iterator_group *group, htab_t table) - string = read_string (false); - require_char_ws (')'); - } -- number = group->find_builtin (name.string); -- end_ptr = add_map_value (end_ptr, number, string); -+ auto *subm = (struct mapping *) htab_find (group->iterators, -+ &name.string); -+ if (subm) -+ { -+ if (m == subm) -+ fatal_with_file_and_line ("recursive definition of `%s'", -+ name.string); -+ for (map_value *v = subm->values; v; v = v->next) -+ { -+ auto *joined = rtx_reader_ptr->join_c_conditions (v->string, -+ string); -+ end_ptr = add_map_value (end_ptr, v->number, joined); -+ } -+ } -+ else -+ { -+ number = group->find_builtin (name.string); -+ end_ptr = add_map_value (end_ptr, number, string); -+ } - c = read_skip_spaces (); - } - while (c != ']'); --- -2.33.0 - diff --git a/0226-Backport-SME-riscv-Add-support-for-strlen-inline-exp.patch b/0226-Backport-SME-riscv-Add-support-for-strlen-inline-exp.patch deleted file mode 100644 index 94d1277..0000000 --- a/0226-Backport-SME-riscv-Add-support-for-strlen-inline-exp.patch +++ /dev/null @@ -1,142 +0,0 @@ -From 637e6469f2225b6f6f6b0c84b4e7abcd8dfd7ca4 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Christoph=20M=C3=BCllner?= -Date: Wed, 28 Sep 2022 11:19:06 +0200 -Subject: [PATCH 127/157] [Backport][SME] riscv: Add support for strlen inline - expansion -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=df48285b2484eb4f8e0570c566677114eb0e553a - -Note: Only introduce the definitions of function -emit_likely_jump_insn and emit_unlikely_jump_insn, -and drop others. - -This patch implements the expansion of the strlen builtin for RV32/RV64 -for xlen-aligned aligned strings if Zbb or XTheadBb instructions are available. -The inserted sequences are: - -rv32gc_zbb (RV64 is similar): - add a3,a0,4 - li a4,-1 -.L1: lw a5,0(a0) - add a0,a0,4 - orc.b a5,a5 - beq a5,a4,.L1 - not a5,a5 - ctz a5,a5 - srl a5,a5,0x3 - add a0,a0,a5 - sub a0,a0,a3 - -rv64gc_xtheadbb (RV32 is similar): - add a4,a0,8 -.L2: ld a5,0(a0) - add a0,a0,8 - th.tstnbz a5,a5 - beqz a5,.L2 - th.rev a5,a5 - th.ff1 a5,a5 - srl a5,a5,0x3 - add a0,a0,a5 - sub a0,a0,a4 - -This allows to inline calls to strlen(), with optimized code for -xlen-aligned strings, resulting in the following benefits over -a call to libc: -* no call/ret instructions -* no stack frame allocation -* no register saving/restoring -* no alignment test - -The inlining mechanism is gated by a new switch ('-minline-strlen') -and by the variable 'optimize_size'. - -Tested using the glibc string tests. - -Signed-off-by: Christoph Müllner - -gcc/ChangeLog: - - * config.gcc: Add new object riscv-string.o. - riscv-string.cc. - * config/riscv/riscv-protos.h (riscv_expand_strlen): - New function. - * config/riscv/riscv.md (strlen): New expand INSN. - * config/riscv/riscv.opt: New flag 'minline-strlen'. - * config/riscv/t-riscv: Add new object riscv-string.o. - * config/riscv/thead.md (th_rev2): Export INSN name. - (th_rev2): Likewise. - (th_tstnbz2): New INSN. - * doc/invoke.texi: Document '-minline-strlen'. - * emit-rtl.cc (emit_likely_jump_insn): New helper function. - (emit_unlikely_jump_insn): Likewise. - * rtl.h (emit_likely_jump_insn): New prototype. - (emit_unlikely_jump_insn): Likewise. - * config/riscv/riscv-string.cc: New file. - -gcc/testsuite/ChangeLog: - - * gcc.target/riscv/xtheadbb-strlen-unaligned.c: New test. - * gcc.target/riscv/xtheadbb-strlen.c: New test. - * gcc.target/riscv/zbb-strlen-disabled-2.c: New test. - * gcc.target/riscv/zbb-strlen-disabled.c: New test. - * gcc.target/riscv/zbb-strlen-unaligned.c: New test. - * gcc.target/riscv/zbb-strlen.c: New test. ---- - gcc/emit-rtl.cc | 24 ++++++++++++++++++++++++ - gcc/rtl.h | 2 ++ - 2 files changed, 26 insertions(+) - -diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc -index 1e02ae254..2df5ff414 100644 ---- a/gcc/emit-rtl.cc -+++ b/gcc/emit-rtl.cc -@@ -5163,6 +5163,30 @@ emit_jump_insn (rtx x) - return last; - } - -+/* Make an insn of code JUMP_INSN with pattern X, -+ add a REG_BR_PROB note that indicates very likely probability, -+ and add it to the end of the doubly-linked list. */ -+ -+rtx_insn * -+emit_likely_jump_insn (rtx x) -+{ -+ rtx_insn *jump = emit_jump_insn (x); -+ add_reg_br_prob_note (jump, profile_probability::very_likely ()); -+ return jump; -+} -+ -+/* Make an insn of code JUMP_INSN with pattern X, -+ add a REG_BR_PROB note that indicates very unlikely probability, -+ and add it to the end of the doubly-linked list. */ -+ -+rtx_insn * -+emit_unlikely_jump_insn (rtx x) -+{ -+ rtx_insn *jump = emit_jump_insn (x); -+ add_reg_br_prob_note (jump, profile_probability::very_unlikely ()); -+ return jump; -+} -+ - /* Make an insn of code CALL_INSN with pattern X - and add it to the end of the doubly-linked list. */ - -diff --git a/gcc/rtl.h b/gcc/rtl.h -index 488016bb4..a0db225cb 100644 ---- a/gcc/rtl.h -+++ b/gcc/rtl.h -@@ -3333,6 +3333,8 @@ extern rtx_note *emit_note_after (enum insn_note, rtx_insn *); - extern rtx_insn *emit_insn (rtx); - extern rtx_insn *emit_debug_insn (rtx); - extern rtx_insn *emit_jump_insn (rtx); -+extern rtx_insn *emit_likely_jump_insn (rtx); -+extern rtx_insn *emit_unlikely_jump_insn (rtx); - extern rtx_insn *emit_call_insn (rtx); - extern rtx_code_label *emit_label (rtx); - extern rtx_jump_table_data *emit_jump_table_data (rtx); --- -2.33.0 - diff --git a/0227-Backport-SME-attribs-Add-overloads-with-namespace-na.patch b/0227-Backport-SME-attribs-Add-overloads-with-namespace-na.patch deleted file mode 100644 index bdb5966..0000000 --- a/0227-Backport-SME-attribs-Add-overloads-with-namespace-na.patch +++ /dev/null @@ -1,189 +0,0 @@ -From 8c6ffb4c6f86231eee318ceeb8546a53037edfe9 Mon Sep 17 00:00:00 2001 -From: Jakub Jelinek -Date: Tue, 4 Oct 2022 23:13:15 +0200 -Subject: [PATCH 128/157] [Backport][SME] attribs: Add overloads with namespace - name - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0764dc8537a4f87089ecd32391cb5f8803b43c96 - -I've discovered a problem with the way we handle scoped attributes. For -declaration or type attributes for attributes we don't know anything about -we just don't add them to the declarations or types, so later in the FEs and -middle-end it is fine to use lookup_attribute etc. which just check the -attribute name and not namespace because non-standard non-GNU attributes -just won't show there. But in the case of attributes on statements, nothing -has filtered out the unknown attributes, so with my earlier assume -attribute patch e.g. c-c++-common/Wno-attributes-6.c test failed because -it uses: -[[vendor::assume(1 + 1 == 2)]]; -with -Wno-attributes=vendor::assume and lookup_attribute ("assume", ) -finds such attribute and handled it that way. -So, for those cases, this patch introduces lookup_attribute and -remove_attribute overloads which specify also the namespace. -I think the fallthrough, hot, cold, likely, unlikely attribute handling -will need to use the new APIs too, so that we don't handle -msft::fallthrough attribute as something we'd know. - -2022-10-04 Jakub Jelinek - - * attribs.h (remove_attribute): Declare overload with additional - attr_ns argument. - (private_lookup_attribute): Declare overload with additional - attr_ns and attr_ns_len arguments. - (lookup_attribute): New overload with additional attr_ns argument. - * attribs.cc (remove_attribute): New overload with additional - attr_ns argument. - (private_lookup_attribute): New overload with additional - attr_ns and attr_ns_len arguments. ---- - gcc/attribs.cc | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++ - gcc/attribs.h | 38 ++++++++++++++++++++++++++++++ - 2 files changed, 101 insertions(+) - -diff --git a/gcc/attribs.cc b/gcc/attribs.cc -index 656ea739e..8e2696bc5 100644 ---- a/gcc/attribs.cc -+++ b/gcc/attribs.cc -@@ -1598,6 +1598,36 @@ remove_attribute (const char *attr_name, tree list) - return list; - } - -+/* Similarly but also match namespace on the removed attributes. */ -+ -+tree -+remove_attribute (const char *attr_ns, const char *attr_name, tree list) -+{ -+ tree *p; -+ gcc_checking_assert (attr_name[0] != '_'); -+ gcc_checking_assert (attr_ns == NULL || attr_ns[0] != '_'); -+ -+ for (p = &list; *p;) -+ { -+ tree l = *p; -+ -+ tree attr = get_attribute_name (l); -+ if (is_attribute_p (attr_name, attr)) -+ { -+ tree ns = get_attribute_namespace (l); -+ if ((ns == NULL_TREE && attr_ns == NULL) -+ || (ns && attr_ns && is_attribute_p (attr_ns, ns))) -+ { -+ *p = TREE_CHAIN (l); -+ continue; -+ } -+ } -+ p = &TREE_CHAIN (l); -+ } -+ -+ return list; -+} -+ - /* Return an attribute list that is the union of a1 and a2. */ - - tree -@@ -1995,6 +2025,39 @@ private_lookup_attribute (const char *attr_name, size_t attr_len, tree list) - return list; - } - -+/* Similarly but with also attribute namespace. */ -+ -+tree -+private_lookup_attribute (const char *attr_ns, const char *attr_name, -+ size_t attr_ns_len, size_t attr_len, tree list) -+{ -+ while (list) -+ { -+ tree attr = get_attribute_name (list); -+ size_t ident_len = IDENTIFIER_LENGTH (attr); -+ if (cmp_attribs (attr_name, attr_len, IDENTIFIER_POINTER (attr), -+ ident_len)) -+ { -+ tree ns = get_attribute_namespace (list); -+ if (ns == NULL_TREE) -+ { -+ if (attr_ns == NULL) -+ break; -+ } -+ else if (attr_ns) -+ { -+ ident_len = IDENTIFIER_LENGTH (ns); -+ if (cmp_attribs (attr_ns, attr_ns_len, IDENTIFIER_POINTER (ns), -+ ident_len)) -+ break; -+ } -+ } -+ list = TREE_CHAIN (list); -+ } -+ -+ return list; -+} -+ - /* Return true if the function decl or type NODE has been declared - with attribute ANAME among attributes ATTRS. */ - -diff --git a/gcc/attribs.h b/gcc/attribs.h -index 0856f98fb..9ad530fcb 100644 ---- a/gcc/attribs.h -+++ b/gcc/attribs.h -@@ -88,6 +88,10 @@ extern tree merge_type_attributes (tree, tree); - - extern tree remove_attribute (const char *, tree); - -+/* Similarly but also with specific attribute namespace. */ -+ -+extern tree remove_attribute (const char *, const char *, tree); -+ - /* Given two attributes lists, return a list of their union. */ - - extern tree merge_attributes (tree, tree); -@@ -119,6 +123,10 @@ extern int attribute_list_contained (const_tree, const_tree); - for size. */ - extern tree private_lookup_attribute (const char *attr_name, size_t attr_len, - tree list); -+extern tree private_lookup_attribute (const char *attr_ns, -+ const char *attr_name, -+ size_t attr_ns_len, size_t attr_len, -+ tree list); - - extern unsigned decls_mismatched_attributes (tree, tree, tree, - const char* const[], -@@ -215,6 +223,36 @@ lookup_attribute (const char *attr_name, tree list) - } - } - -+/* Similar to lookup_attribute, but also match the attribute namespace. */ -+ -+static inline tree -+lookup_attribute (const char *attr_ns, const char *attr_name, tree list) -+{ -+ if (CHECKING_P && attr_name[0] != '_') -+ { -+ size_t attr_len = strlen (attr_name); -+ gcc_checking_assert (!canonicalize_attr_name (attr_name, attr_len)); -+ } -+ if (CHECKING_P && attr_ns && attr_ns[0] != '_') -+ { -+ size_t attr_ns_len = strlen (attr_ns); -+ gcc_checking_assert (!canonicalize_attr_name (attr_ns, attr_ns_len)); -+ } -+ /* In most cases, list is NULL_TREE. */ -+ if (list == NULL_TREE) -+ return NULL_TREE; -+ else -+ { -+ size_t attr_ns_len = attr_ns ? strlen (attr_ns) : 0; -+ size_t attr_len = strlen (attr_name); -+ /* Do the strlen() before calling the out-of-line implementation. -+ In most cases attr_name is a string constant, and the compiler -+ will optimize the strlen() away. */ -+ return private_lookup_attribute (attr_ns, attr_name, -+ attr_ns_len, attr_len, list); -+ } -+} -+ - /* Given an attribute name ATTR_NAME and a list of attributes LIST, - return a pointer to the attribute's list first element if the attribute - starts with ATTR_NAME. ATTR_NAME must be in the form 'text' (not --- -2.33.0 - diff --git a/0228-Backport-SME-vec-Add-array_slice-constructors-from-n.patch b/0228-Backport-SME-vec-Add-array_slice-constructors-from-n.patch deleted file mode 100644 index 625c065..0000000 --- a/0228-Backport-SME-vec-Add-array_slice-constructors-from-n.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 044dc671f7eb723df5b6ce2364d6ae579c0cc984 Mon Sep 17 00:00:00 2001 -From: Martin Jambor -Date: Tue, 30 Aug 2022 18:50:35 +0200 -Subject: [PATCH 129/157] [Backport][SME] vec: Add array_slice constructors - from non-const and gc vectors - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=15433c214df295f2281a90fcf283355b21beca0e - -This patch adds constructors of array_slice that are required to -create them from non-const (heap or auto) vectors or from GC vectors. - -gcc/ChangeLog: - -2022-08-08 Martin Jambor - - * vec.h (array_slice): Add constructors for non-const reference to - heap vector and pointers to heap vectors. ---- - gcc/vec.h | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/gcc/vec.h b/gcc/vec.h -index 3ba7ea7ed..fc3b10c85 100644 ---- a/gcc/vec.h -+++ b/gcc/vec.h -@@ -2264,6 +2264,18 @@ public: - array_slice (const vec &v) - : m_base (v.address ()), m_size (v.length ()) {} - -+ template -+ array_slice (vec &v) -+ : m_base (v.address ()), m_size (v.length ()) {} -+ -+ template -+ array_slice (const vec *v) -+ : m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {} -+ -+ template -+ array_slice (vec *v) -+ : m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {} -+ - iterator begin () { return m_base; } - iterator end () { return m_base + m_size; } - --- -2.33.0 - diff --git a/0229-Backport-SME-A-couple-of-va_gc_atomic-tweaks.patch b/0229-Backport-SME-A-couple-of-va_gc_atomic-tweaks.patch deleted file mode 100644 index 0fd733c..0000000 --- a/0229-Backport-SME-A-couple-of-va_gc_atomic-tweaks.patch +++ /dev/null @@ -1,140 +0,0 @@ -From 12dd36f06e13ee9cd684c00732caa684f49b3610 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 29 Jun 2023 08:48:17 +0100 -Subject: [PATCH 130/157] [Backport][SME] A couple of va_gc_atomic tweaks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4e9f6c14280699997a633cefd3fb315b2bd4762c - -The only current user of va_gc_atomic is Ada's: - - vec - -It uses the generic gt_pch_nx routines (with gt_pch_nx being the -“note pointers” hooks), such as: - - template - void - gt_pch_nx (vec *v) - { - extern void gt_pch_nx (T &); - for (unsigned i = 0; i < v->length (); i++) - gt_pch_nx ((*v)[i]); - } - -It then defines gt_pch_nx routines for Entity_Id &. - -The problem is that if we wanted to take the same approach for -an array of unsigned ints, we'd need to define: - - inline void gt_pch_nx (unsigned int &) { } - -which would then be ambiguous with: - - inline void gt_pch_nx (unsigned int) { } - -The point of va_gc_atomic is that the elements don't need to be GCed, -and so we have: - - template - void - gt_ggc_mx (vec *v ATTRIBUTE_UNUSED) - { - /* Nothing to do. Vectors of atomic types wrt GC do not need to - be traversed. */ - } - -I think it's therefore reasonable to assume that no pointers will -need to be processed for PCH either. - -The patch also relaxes the array_slice constructor for vec * -so that it handles all embedded vectors. - -gcc/ - * vec.h (gt_pch_nx): Add overloads for va_gc_atomic. - (array_slice): Relax va_gc constructor to handle all vectors - with a vl_embed layout. - -gcc/ada/ - * gcc-interface/decl.cc (gt_pch_nx): Remove overloads for Entity_Id. ---- - gcc/ada/gcc-interface/decl.cc | 11 ----------- - gcc/vec.h | 22 ++++++++++++++++++---- - 2 files changed, 18 insertions(+), 15 deletions(-) - -diff --git a/gcc/ada/gcc-interface/decl.cc b/gcc/ada/gcc-interface/decl.cc -index 1c7a71684..7193b55c7 100644 ---- a/gcc/ada/gcc-interface/decl.cc -+++ b/gcc/ada/gcc-interface/decl.cc -@@ -163,17 +163,6 @@ struct GTY((for_user)) tree_entity_vec_map - vec *to; - }; - --void --gt_pch_nx (Entity_Id &) --{ --} -- --void --gt_pch_nx (Entity_Id *x, gt_pointer_operator op, void *cookie) --{ -- op (x, NULL, cookie); --} -- - struct dummy_type_hasher : ggc_cache_ptr_hash - { - static inline hashval_t -diff --git a/gcc/vec.h b/gcc/vec.h -index fc3b10c85..592d3f7e0 100644 ---- a/gcc/vec.h -+++ b/gcc/vec.h -@@ -1383,6 +1383,13 @@ gt_pch_nx (vec *v) - gt_pch_nx ((*v)[i]); - } - -+template -+void -+gt_pch_nx (vec *) -+{ -+ /* No pointers to note. */ -+} -+ - template - void - gt_pch_nx (vec *v, gt_pointer_operator op, void *cookie) -@@ -1400,6 +1407,13 @@ gt_pch_nx (vec *v, gt_pointer_operator op, void *cookie) - gt_pch_nx (&((*v)[i]), op, cookie); - } - -+template -+void -+gt_pch_nx (vec *, gt_pointer_operator, void *) -+{ -+ /* No pointers to note. */ -+} -+ - - /* Space efficient vector. These vectors can grow dynamically and are - allocated together with their control data. They are suited to be -@@ -2268,12 +2282,12 @@ public: - array_slice (vec &v) - : m_base (v.address ()), m_size (v.length ()) {} - -- template -- array_slice (const vec *v) -+ template -+ array_slice (const vec *v) - : m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {} - -- template -- array_slice (vec *v) -+ template -+ array_slice (vec *v) - : m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {} - - iterator begin () { return m_base; } --- -2.33.0 - diff --git a/0230-Backport-SME-middle-end-Fix-issue-of-poly_uint16-1-1.patch b/0230-Backport-SME-middle-end-Fix-issue-of-poly_uint16-1-1.patch deleted file mode 100644 index 588ea40..0000000 --- a/0230-Backport-SME-middle-end-Fix-issue-of-poly_uint16-1-1.patch +++ /dev/null @@ -1,34 +0,0 @@ -From bb15d4c4476e3ba303c5afe0adae0d86ab5f0a9b Mon Sep 17 00:00:00 2001 -From: zhongjuzhe -Date: Mon, 22 Aug 2022 10:15:31 +0100 -Subject: [PATCH 131/157] [Backport][SME] middle-end: Fix issue of poly_uint16 - (1, 1) in self test - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=21e7d87a901d45f0cb5e5510d22bfbdb0d0ac6a1 - -This patch fix issue of poly_uint16 (1, 1) in machine mode self test. - -gcc/ChangeLog: - - * simplify-rtx.cc (test_vector_subregs_fore_back): Make first value - and repeat value different. ---- - gcc/simplify-rtx.cc | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc -index e152918b0..fc79a2e2e 100644 ---- a/gcc/simplify-rtx.cc -+++ b/gcc/simplify-rtx.cc -@@ -8380,7 +8380,7 @@ test_vector_subregs_fore_back (machine_mode inner_mode) - for (unsigned int i = 0; i < count; ++i) - builder.quick_push (gen_int_mode (i, int_mode)); - for (unsigned int i = 0; i < count; ++i) -- builder.quick_push (gen_int_mode (-(int) i, int_mode)); -+ builder.quick_push (gen_int_mode (-1 - (int) i, int_mode)); - rtx x = builder.build (); - - test_vector_subregs_modes (x); --- -2.33.0 - diff --git a/0231-SME-Add-missing-header-file-in-aarch64.cc.patch b/0231-SME-Add-missing-header-file-in-aarch64.cc.patch deleted file mode 100644 index 6aa3d84..0000000 --- a/0231-SME-Add-missing-header-file-in-aarch64.cc.patch +++ /dev/null @@ -1,24 +0,0 @@ -From cce05b3365c3986ca74c04f442662a21b4f03a61 Mon Sep 17 00:00:00 2001 -From: xiezhiheng -Date: Mon, 4 Mar 2024 14:39:36 +0800 -Subject: [PATCH 132/157] [SME] Add missing header file in `aarch64.cc` - ---- - gcc/config/aarch64/aarch64.cc | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index f7285555b..0117a3e12 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -86,6 +86,7 @@ - #include "tree-pass.h" - #include "cfgbuild.h" - #include "symbol-summary.h" -+#include "value-range.h" - #include "ipa-prop.h" - #include "ipa-fnsummary.h" - --- -2.33.0 - diff --git a/0232-Backport-SME-c-Add-support-for-__extension__.patch b/0232-Backport-SME-c-Add-support-for-__extension__.patch deleted file mode 100644 index 5ba9828..0000000 --- a/0232-Backport-SME-c-Add-support-for-__extension__.patch +++ /dev/null @@ -1,327 +0,0 @@ -From 3714cfb47fafef884aa2ff330935fb44b7966909 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 24 Aug 2023 11:49:58 +0100 -Subject: [PATCH 133/157] [Backport][SME] c: Add support for [[__extension__ - ...]] - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=207a5daa9dcf31e367152163ad2a2ab4a0858967 - -[[]] attributes are a recent addition to C, but as a GNU extension, -GCC allows them to be used in C11 and earlier. Normally this use -would trigger a pedwarn (for -pedantic, -Wc11-c2x-compat, etc.). - -This patch allows the pedwarn to be suppressed by starting the -attribute-list with __extension__. - -Also, :: is not a single lexing token prior to C2X, so it wasn't -possible to use scoped attributes in C11, even as a GNU extension. -The patch allows two colons to be used in place of :: when -__extension__ is used. No attempt is made to check whether the -two colons are immediately adjacent. - -gcc/ - * doc/extend.texi: Document the C [[__extension__ ...]] construct. - -gcc/c/ - * c-parser.cc (c_parser_std_attribute): Conditionally allow - two colons to be used in place of ::. - (c_parser_std_attribute_list): New function, split out from... - (c_parser_std_attribute_specifier): ...here. Allow the attribute-list - to start with __extension__. When it does, also allow two colons - to be used in place of ::. - -gcc/testsuite/ - * gcc.dg/c2x-attr-syntax-6.c: New test. - * gcc.dg/c2x-attr-syntax-7.c: Likewise. ---- - gcc/c/c-parser.cc | 64 ++++++++++++++++++------ - gcc/doc/extend.texi | 27 ++++++++-- - gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c | 62 +++++++++++++++++++++++ - gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c | 60 ++++++++++++++++++++++ - 4 files changed, 193 insertions(+), 20 deletions(-) - create mode 100644 gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c - create mode 100644 gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c - -diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc -index 78a313fe3..486f46e1c 100644 ---- a/gcc/c/c-parser.cc -+++ b/gcc/c/c-parser.cc -@@ -4894,10 +4894,18 @@ c_parser_balanced_token_sequence (c_parser *parser) - ( balanced-token-sequence[opt] ) - - Keywords are accepted as identifiers for this purpose. --*/ -+ -+ As an extension, we permit an attribute-specifier to be: -+ -+ [ [ __extension__ attribute-list ] ] -+ -+ Two colons are then accepted as a synonym for ::. No attempt is made -+ to check whether the colons are immediately adjacent. LOOSE_SCOPE_P -+ indicates whether this relaxation is in effect. */ - - static tree --c_parser_std_attribute (c_parser *parser, bool for_tm) -+c_parser_std_attribute (c_parser *parser, bool for_tm, -+ bool loose_scope_p = false) - { - c_token *token = c_parser_peek_token (parser); - tree ns, name, attribute; -@@ -4910,9 +4918,14 @@ c_parser_std_attribute (c_parser *parser, bool for_tm) - } - name = canonicalize_attr_name (token->value); - c_parser_consume_token (parser); -- if (c_parser_next_token_is (parser, CPP_SCOPE)) -+ if (c_parser_next_token_is (parser, CPP_SCOPE) -+ || (loose_scope_p -+ && c_parser_next_token_is (parser, CPP_COLON) -+ && c_parser_peek_2nd_token (parser)->type == CPP_COLON)) - { - ns = name; -+ if (c_parser_next_token_is (parser, CPP_COLON)) -+ c_parser_consume_token (parser); - c_parser_consume_token (parser); - token = c_parser_peek_token (parser); - if (token->type != CPP_NAME && token->type != CPP_KEYWORD) -@@ -4981,19 +4994,9 @@ c_parser_std_attribute (c_parser *parser, bool for_tm) - } - - static tree --c_parser_std_attribute_specifier (c_parser *parser, bool for_tm) -+c_parser_std_attribute_list (c_parser *parser, bool for_tm, -+ bool loose_scope_p = false) - { -- location_t loc = c_parser_peek_token (parser)->location; -- if (!c_parser_require (parser, CPP_OPEN_SQUARE, "expected %<[%>")) -- return NULL_TREE; -- if (!c_parser_require (parser, CPP_OPEN_SQUARE, "expected %<[%>")) -- { -- c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE, "expected %<]%>"); -- return NULL_TREE; -- } -- if (!for_tm) -- pedwarn_c11 (loc, OPT_Wpedantic, -- "ISO C does not support %<[[]]%> attributes before C2X"); - tree attributes = NULL_TREE; - while (true) - { -@@ -5005,7 +5008,7 @@ c_parser_std_attribute_specifier (c_parser *parser, bool for_tm) - c_parser_consume_token (parser); - continue; - } -- tree attribute = c_parser_std_attribute (parser, for_tm); -+ tree attribute = c_parser_std_attribute (parser, for_tm, loose_scope_p); - if (attribute != error_mark_node) - { - TREE_CHAIN (attribute) = attributes; -@@ -5014,6 +5017,35 @@ c_parser_std_attribute_specifier (c_parser *parser, bool for_tm) - if (c_parser_next_token_is_not (parser, CPP_COMMA)) - break; - } -+ return attributes; -+} -+ -+static tree -+c_parser_std_attribute_specifier (c_parser *parser, bool for_tm) -+{ -+ location_t loc = c_parser_peek_token (parser)->location; -+ if (!c_parser_require (parser, CPP_OPEN_SQUARE, "expected %<[%>")) -+ return NULL_TREE; -+ if (!c_parser_require (parser, CPP_OPEN_SQUARE, "expected %<[%>")) -+ { -+ c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE, "expected %<]%>"); -+ return NULL_TREE; -+ } -+ tree attributes; -+ if (c_parser_next_token_is_keyword (parser, RID_EXTENSION)) -+ { -+ auto ext = disable_extension_diagnostics (); -+ c_parser_consume_token (parser); -+ attributes = c_parser_std_attribute_list (parser, for_tm, true); -+ restore_extension_diagnostics (ext); -+ } -+ else -+ { -+ if (!for_tm) -+ pedwarn_c11 (loc, OPT_Wpedantic, -+ "ISO C does not support %<[[]]%> attributes before C2X"); -+ attributes = c_parser_std_attribute_list (parser, for_tm); -+ } - c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE, "expected %<]%>"); - c_parser_skip_until_found (parser, CPP_CLOSE_SQUARE, "expected %<]%>"); - return nreverse (attributes); -diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi -index 674db2f1a..3cfecee53 100644 ---- a/gcc/doc/extend.texi -+++ b/gcc/doc/extend.texi -@@ -11726,10 +11726,29 @@ macros to replace them with the customary keywords. It looks like this: - @findex __extension__ - @opindex pedantic - @option{-pedantic} and other options cause warnings for many GNU C extensions. --You can --prevent such warnings within one expression by writing --@code{__extension__} before the expression. @code{__extension__} has no --effect aside from this. -+You can suppress such warnings using the keyword @code{__extension__}. -+Specifically: -+ -+@itemize @bullet -+@item -+Writing @code{__extension__} before an expression prevents warnings -+about extensions within that expression. -+ -+@item -+In C, writing: -+ -+@smallexample -+[[__extension__ @dots{}]] -+@end smallexample -+ -+suppresses warnings about using @samp{[[]]} attributes in C versions -+that predate C2X@. Since the scope token @samp{::} is not a single -+lexing token in earlier versions of C, this construct also allows two colons -+to be used in place of @code{::}. GCC does not check whether the two -+colons are immediately adjacent. -+@end itemize -+ -+@code{__extension__} has no effect aside from this. - - @node Incomplete Enums - @section Incomplete @code{enum} Types -diff --git a/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c b/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c -new file mode 100644 -index 000000000..9e5f65ce4 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c -@@ -0,0 +1,62 @@ -+/* Test C2x attribute syntax: use of __extension__ in C11 mode. */ -+/* { dg-do compile } */ -+/* { dg-options "-std=c11 -pedantic-errors" } */ -+ -+#define FOO :: -+#define BAR : -+#define JOIN(A, B) A/**/B -+#define JOIN2(A, B) A##B -+ -+typedef int [[__extension__ gnu::vector_size (4)]] g1; -+typedef int [[__extension__ gnu :: vector_size (4)]] g2; -+typedef int [[__extension__ gnu : : vector_size (4)]] g3; -+typedef int [[__extension__ gnu: :vector_size (4)]] g4; -+typedef int [[__extension__ gnu FOO vector_size (4)]] g5; -+typedef int [[__extension__ gnu BAR BAR vector_size (4)]] g6; -+typedef int [[__extension__ gnu :/**/: vector_size (4)]] g7; -+typedef int [[__extension__ gnu JOIN(:,:) vector_size (4)]] g8; -+typedef int [[__extension__ gnu :: vector_size (sizeof (void (*)(...)))]] g10; -+typedef int [[__extension__]] g11; -+typedef int [[__extension__,]] g12; -+typedef int [[__extension__, ,,,, ,, ,]] g13; -+[[__extension__ deprecated]] int g14 (); -+[[__extension__ nodiscard]] int g15 (); -+[[__extension__ noreturn]] void g16 (); -+ -+int -+cases (int x) -+{ -+ switch (x) -+ { -+ case 1: -+ case 2: -+ case 4: -+ x += 1; -+ [[__extension__ fallthrough]]; -+ case 19: -+ case 33: -+ x *= 2; -+ [[fallthrough]]; /* { dg-error {attributes before C2X} } */ -+ case 99: -+ return x; -+ -+ default: -+ return 0; -+ } -+} -+ -+typedef int [[__extension__ vector_size (4)]] b1; /* { dg-error {'vector_size' attribute ignored} } */ -+typedef int [[__extension__ __extension__]] b2; /* { dg-error {'extension' attribute ignored} } */ -+typedef int [[__extension__ unknown_attribute]] b3; /* { dg-error {'unknown_attribute' attribute ignored} } */ -+typedef int [[__extension__ gnu:vector_size(4)]] b4; /* { dg-error {expected '\]' before ':'} } */ -+/* { dg-error {'gnu' attribute ignored} "" { target *-*-* } .-1 } */ -+typedef int [[__extension__ gnu JOIN2(:,:) vector_size (4)]] b5; /* { dg-error {pasting ":" and ":" does not give a valid preprocessing token} } */ -+typedef int [[gnu::vector_size(4)]] b6; /* { dg-error {expected '\]' before ':'} } */ -+/* { dg-error {'gnu' attribute ignored} "" { target *-*-* } .-1 } */ -+/* { dg-error {attributes before C2X} "" { target *-*-* } .-2 } */ -+typedef int [[gnu : : vector_size(4)]] b7; /* { dg-error {expected '\]' before ':'} } */ -+/* { dg-error {'gnu' attribute ignored} "" { target *-*-* } .-1 } */ -+/* { dg-error {attributes before C2X} "" { target *-*-* } .-2 } */ -+typedef int [[gnu : vector_size(4)]] b8; /* { dg-error {expected '\]' before ':'} } */ -+/* { dg-error {'gnu' attribute ignored} "" { target *-*-* } .-1 } */ -+/* { dg-error {attributes before C2X} "" { target *-*-* } .-2 } */ -diff --git a/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c b/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c -new file mode 100644 -index 000000000..702f733b1 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c -@@ -0,0 +1,60 @@ -+/* Test C2x attribute syntax: use of __extension__ in C11 mode. */ -+/* { dg-do compile } */ -+/* { dg-options "-std=c2x -pedantic-errors -Wc11-c2x-compat" } */ -+ -+#define FOO :: -+#define BAR : -+#define JOIN(A, B) A/**/B -+#define JOIN2(A, B) A##B -+ -+typedef int [[__extension__ gnu::vector_size (4)]] g1; -+typedef int [[__extension__ gnu :: vector_size (4)]] g2; -+typedef int [[__extension__ gnu : : vector_size (4)]] g3; -+typedef int [[__extension__ gnu: :vector_size (4)]] g4; -+typedef int [[__extension__ gnu FOO vector_size (4)]] g5; -+typedef int [[__extension__ gnu BAR BAR vector_size (4)]] g6; -+typedef int [[__extension__ gnu :/**/: vector_size (4)]] g7; -+typedef int [[__extension__ gnu JOIN(:,:) vector_size (4)]] g8; -+typedef int [[__extension__ gnu :: vector_size (sizeof (void (*)(...)))]] g10; -+typedef int [[__extension__]] g11; -+typedef int [[__extension__,]] g12; -+typedef int [[__extension__, ,,,, ,, ,]] g13; -+[[__extension__ deprecated]] int g14 (); -+[[__extension__ nodiscard]] int g15 (); -+[[__extension__ noreturn]] void g16 (); -+ -+int -+cases (int x) -+{ -+ switch (x) -+ { -+ case 1: -+ case 2: -+ case 4: -+ x += 1; -+ [[__extension__ fallthrough]]; -+ case 19: -+ case 33: -+ x *= 2; -+ [[fallthrough]]; /* { dg-warning {attributes before C2X} } */ -+ case 99: -+ return x; -+ -+ default: -+ return 0; -+ } -+} -+ -+typedef int [[__extension__ vector_size (4)]] b1; /* { dg-error {'vector_size' attribute ignored} } */ -+typedef int [[__extension__ __extension__]] b2; /* { dg-error {'extension' attribute ignored} } */ -+typedef int [[__extension__ unknown_attribute]] b3; /* { dg-error {'unknown_attribute' attribute ignored} } */ -+typedef int [[__extension__ gnu:vector_size(4)]] b4; /* { dg-error {expected '\]' before ':'} } */ -+/* { dg-error {'gnu' attribute ignored} "" { target *-*-* } .-1 } */ -+typedef int [[__extension__ gnu JOIN2(:,:) vector_size (4)]] b5; -+typedef int [[gnu::vector_size(4)]] b6; /* { dg-warning {attributes before C2X} } */ -+typedef int [[gnu : : vector_size(4)]] b7; /* { dg-error {expected '\]' before ':'} } */ -+/* { dg-error {'gnu' attribute ignored} "" { target *-*-* } .-1 } */ -+/* { dg-warning {attributes before C2X} "" { target *-*-* } .-2 } */ -+typedef int [[gnu : vector_size(4)]] b8; /* { dg-error {expected '\]' before ':'} } */ -+/* { dg-error {'gnu' attribute ignored} "" { target *-*-* } .-1 } */ -+/* { dg-warning {attributes before C2X} "" { target *-*-* } .-2 } */ --- -2.33.0 - diff --git a/0233-Backport-SME-lra-Updates-of-biggest-mode-for-hard-re.patch b/0233-Backport-SME-lra-Updates-of-biggest-mode-for-hard-re.patch deleted file mode 100644 index 95b50de..0000000 --- a/0233-Backport-SME-lra-Updates-of-biggest-mode-for-hard-re.patch +++ /dev/null @@ -1,140 +0,0 @@ -From 29a71fc5cbfc3b5e4649abf51740daed5ea243bd Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 5 Dec 2023 09:20:55 +0000 -Subject: [PATCH 134/157] [Backport][SME] lra: Updates of biggest mode for hard - regs [PR112278] - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=6e2e0ce6795c863e295eb33559f8dc0500297da3 - -LRA keeps track of the biggest mode for both hard registers and -pseudos. The updates assume that the modes are ordered, i.e. that -we can tell whether one is no bigger than the other at compile time. - -That is (or at least seemed to be) a reasonable restriction for pseudos. -But it isn't necessarily so for hard registers, since the uses of hard -registers can be logically distinct. The testcase is an example of this. - -The biggest mode of hard registers is also special for other reasons. -As the existing comment says: - - /* A reg can have a biggest_mode of VOIDmode if it was only ever seen as - part of a multi-word register. In that case, just use the reg_rtx - mode. Do the same also if the biggest mode was larger than a register - or we can not compare the modes. Otherwise, limit the size to that of - the biggest access in the function or to the natural mode at least. */ - -This patch applies the same approach to the updates. - -gcc/ - PR rtl-optimization/112278 - * lra-int.h (lra_update_biggest_mode): New function. - * lra-coalesce.cc (merge_pseudos): Use it. - * lra-lives.cc (process_bb_lives): Likewise. - * lra.cc (new_insn_reg): Likewise. - -gcc/testsuite/ - PR rtl-optimization/112278 - * gcc.target/aarch64/sve/pr112278.c: New test. ---- - gcc/lra-coalesce.cc | 4 +--- - gcc/lra-int.h | 15 +++++++++++++++ - gcc/lra-lives.cc | 4 +--- - gcc/lra.cc | 5 ++--- - gcc/testsuite/gcc.target/aarch64/sve/pr112278.c | 15 +++++++++++++++ - 5 files changed, 34 insertions(+), 9 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr112278.c - -diff --git a/gcc/lra-coalesce.cc b/gcc/lra-coalesce.cc -index c82934569..901a44663 100644 ---- a/gcc/lra-coalesce.cc -+++ b/gcc/lra-coalesce.cc -@@ -112,9 +112,7 @@ merge_pseudos (int regno1, int regno2) - = (lra_merge_live_ranges - (lra_reg_info[first].live_ranges, - lra_copy_live_range_list (lra_reg_info[first2].live_ranges))); -- if (partial_subreg_p (lra_reg_info[first].biggest_mode, -- lra_reg_info[first2].biggest_mode)) -- lra_reg_info[first].biggest_mode = lra_reg_info[first2].biggest_mode; -+ lra_update_biggest_mode (first, lra_reg_info[first2].biggest_mode); - } - - /* Change pseudos in *LOC on their coalescing group -diff --git a/gcc/lra-int.h b/gcc/lra-int.h -index 04baefef3..040e87d11 100644 ---- a/gcc/lra-int.h -+++ b/gcc/lra-int.h -@@ -525,4 +525,19 @@ lra_assign_reg_val (int from, int to) - lra_reg_info[to].offset = lra_reg_info[from].offset; - } - -+/* Update REGNO's biggest recorded mode so that it includes a reference -+ in mode MODE. */ -+inline void -+lra_update_biggest_mode (int regno, machine_mode mode) -+{ -+ if (!ordered_p (GET_MODE_SIZE (lra_reg_info[regno].biggest_mode), -+ GET_MODE_SIZE (mode))) -+ { -+ gcc_checking_assert (HARD_REGISTER_NUM_P (regno)); -+ lra_reg_info[regno].biggest_mode = reg_raw_mode[regno]; -+ } -+ else if (partial_subreg_p (lra_reg_info[regno].biggest_mode, mode)) -+ lra_reg_info[regno].biggest_mode = mode; -+} -+ - #endif /* GCC_LRA_INT_H */ -diff --git a/gcc/lra-lives.cc b/gcc/lra-lives.cc -index a755464ee..fb4a12304 100644 ---- a/gcc/lra-lives.cc -+++ b/gcc/lra-lives.cc -@@ -770,9 +770,7 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) - { - int regno = reg->regno; - -- if (partial_subreg_p (lra_reg_info[regno].biggest_mode, -- reg->biggest_mode)) -- lra_reg_info[regno].biggest_mode = reg->biggest_mode; -+ lra_update_biggest_mode (regno, reg->biggest_mode); - if (HARD_REGISTER_NUM_P (regno)) - lra_hard_reg_usage[regno] += freq; - } -diff --git a/gcc/lra.cc b/gcc/lra.cc -index 1444cb759..8fda432f1 100644 ---- a/gcc/lra.cc -+++ b/gcc/lra.cc -@@ -559,9 +559,8 @@ new_insn_reg (rtx_insn *insn, int regno, enum op_type type, - lra_insn_reg *ir = lra_insn_reg_pool.allocate (); - ir->type = type; - ir->biggest_mode = mode; -- if (NONDEBUG_INSN_P (insn) -- && partial_subreg_p (lra_reg_info[regno].biggest_mode, mode)) -- lra_reg_info[regno].biggest_mode = mode; -+ if (NONDEBUG_INSN_P (insn)) -+ lra_update_biggest_mode (regno, mode); - ir->subreg_p = subreg_p; - ir->early_clobber_alts = early_clobber_alts; - ir->regno = regno; -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr112278.c b/gcc/testsuite/gcc.target/aarch64/sve/pr112278.c -new file mode 100644 -index 000000000..4f56add2b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr112278.c -@@ -0,0 +1,15 @@ -+#include -+#include -+ -+void -+f (void) -+{ -+ { -+ register svint8_t v0 asm ("z0"); -+ asm volatile ("" : "=w" (v0)); -+ } -+ { -+ register int8x8x4_t v0 asm ("v0"); -+ asm volatile ("" : "=w" (v0)); -+ } -+} --- -2.33.0 - diff --git a/0234-Backport-SME-c-Support-C2x-empty-initializer-braces.patch b/0234-Backport-SME-c-Support-C2x-empty-initializer-braces.patch deleted file mode 100644 index f4b1efd..0000000 --- a/0234-Backport-SME-c-Support-C2x-empty-initializer-braces.patch +++ /dev/null @@ -1,672 +0,0 @@ -From 0a34bb6b18cdf34cb9d4f34b1697e1bcfcff139b Mon Sep 17 00:00:00 2001 -From: Joseph Myers -Date: Thu, 25 Aug 2022 21:02:57 +0000 -Subject: [PATCH 135/157] [Backport][SME] c: Support C2x empty initializer - braces - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=14cfa01755a66afbae2539f8b5796c960ddcecc6 - -ISO C2x standardizes empty initializer braces {}. Implement this -feature accordingly. The basic case was already supported and so just -needed diagnostic adjustments. However, the standard feature also -includes two cases that were not previously supported: empty -initializer braces for scalars, and empty initializer braces for -VLAs. Thus, add support for those features as well, updating existing -tests that expected them to be diagnosed. - -There was already some gimplifier support for converting -variable-sized initializations with empty CONSTRUCTORs to memset. -However, it didn't apply here; code earlier in gimplify_modify_expr -ended up calling gimplify_init_constructor via -gimplify_modify_expr_rhs, which ended up handling the CONSTRUCTOR in a -way that generated an ICE later. Add a check for this case earlier in -gimplify_modify_expr to avoid that issue. - -Bootstrapped with no regressions for x86_64-pc-linux-gnu. - -gcc/ - * gimplify.cc (gimplify_modify_expr): Convert initialization from - a variable-size CONSTRUCTOR to memset before call to - gimplify_modify_expr_rhs. - -gcc/c/ - * c-decl.cc (start_decl): Do not diagnose initialization of - variable-sized objects here. - * c-parser.cc (c_parser_braced_init): Add argument DECL. All - callers changed. - (c_parser_initializer): Diagnose initialization of variable-sized - objects other than with braced initializer. - (c_parser_braced_init): Use pedwarn_c11 for empty initializer - braces and update diagnostic text. Diagnose initialization of - variable-sized objects with nonempty braces. - * c-typeck.cc (digest_init): Update diagnostic for initialization - of variable-sized objects. - (really_start_incremental_init, set_designator) - (process_init_element): Update comments. - (pop_init_level): Allow scalar empty initializers. - -gcc/testsuite/ - * gcc.dg/c11-empty-init-1.c, gcc.dg/c11-empty-init-2.c, - gcc.dg/c11-empty-init-3.c, gcc.dg/c2x-empty-init-1.c, - gcc.dg/c2x-empty-init-2.c, gcc.dg/c2x-empty-init-3.c, - gcc.dg/gnu2x-empty-init-1.c, gcc.dg/gnu2x-empty-init-2.c: New - tests. - * gcc.dg/torture/dfp-default-init-1.c: Also test empty - initializers. - * gcc.dg/init-bad-1.c, gcc.dg/noncompile/pr71583.c, - gcc.dg/pr61096-1.c, gcc.dg/vla-init-2.c, gcc.dg/vla-init-3.c, - gcc.target/i386/sse2-bfloat16-scalar-typecheck.c: Update expected - diagnostics. - * gcc.dg/ubsan/c-shift-1.c: Use nonempty initializers for VLA - initializations expected to be diagnosed. ---- - gcc/c/c-decl.cc | 20 +----- - gcc/c/c-parser.cc | 24 +++++-- - gcc/c/c-typeck.cc | 23 ++++--- - gcc/gimplify.cc | 15 +++++ - gcc/testsuite/gcc.dg/c11-empty-init-1.c | 25 +++++++ - gcc/testsuite/gcc.dg/c11-empty-init-2.c | 25 +++++++ - gcc/testsuite/gcc.dg/c11-empty-init-3.c | 25 +++++++ - gcc/testsuite/gcc.dg/c2x-empty-init-1.c | 80 +++++++++++++++++++++++ - gcc/testsuite/gcc.dg/c2x-empty-init-2.c | 18 +++++ - gcc/testsuite/gcc.dg/c2x-empty-init-3.c | 25 +++++++ - gcc/testsuite/gcc.dg/gnu2x-empty-init-1.c | 29 ++++++++ - gcc/testsuite/gcc.dg/gnu2x-empty-init-2.c | 16 +++++ - gcc/testsuite/gcc.dg/init-bad-1.c | 3 +- - gcc/testsuite/gcc.dg/noncompile/pr71583.c | 2 +- - gcc/testsuite/gcc.dg/pr61096-1.c | 2 +- - gcc/testsuite/gcc.dg/ubsan/c-shift-1.c | 12 ++-- - gcc/testsuite/gcc.dg/vla-init-2.c | 1 - - gcc/testsuite/gcc.dg/vla-init-3.c | 1 - - 18 files changed, 301 insertions(+), 45 deletions(-) - create mode 100644 gcc/testsuite/gcc.dg/c11-empty-init-1.c - create mode 100644 gcc/testsuite/gcc.dg/c11-empty-init-2.c - create mode 100644 gcc/testsuite/gcc.dg/c11-empty-init-3.c - create mode 100644 gcc/testsuite/gcc.dg/c2x-empty-init-1.c - create mode 100644 gcc/testsuite/gcc.dg/c2x-empty-init-2.c - create mode 100644 gcc/testsuite/gcc.dg/c2x-empty-init-3.c - create mode 100644 gcc/testsuite/gcc.dg/gnu2x-empty-init-1.c - create mode 100644 gcc/testsuite/gcc.dg/gnu2x-empty-init-2.c - -diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc -index 9d87a8cdb..685bb1757 100644 ---- a/gcc/c/c-decl.cc -+++ b/gcc/c/c-decl.cc -@@ -5166,29 +5166,15 @@ start_decl (struct c_declarator *declarator, struct c_declspecs *declspecs, - initialized = false; - else if (COMPLETE_TYPE_P (TREE_TYPE (decl))) - { -- /* A complete type is ok if size is fixed. */ -- -- if (!poly_int_tree_p (TYPE_SIZE (TREE_TYPE (decl))) -- || C_DECL_VARIABLE_SIZE (decl)) -- { -- error ("variable-sized object may not be initialized"); -- initialized = false; -- } -+ /* A complete type is ok if size is fixed. If the size is -+ variable, an empty initializer is OK and nonempty -+ initializers will be diagnosed in the parser. */ - } - else if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE) - { - error ("variable %qD has initializer but incomplete type", decl); - initialized = false; - } -- else if (C_DECL_VARIABLE_SIZE (decl)) -- { -- /* Although C99 is unclear about whether incomplete arrays -- of VLAs themselves count as VLAs, it does not make -- sense to permit them to be initialized given that -- ordinary VLAs may not be initialized. */ -- error ("variable-sized object may not be initialized"); -- initialized = false; -- } - } - - if (initialized) -diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc -index 486f46e1c..6db535d11 100644 ---- a/gcc/c/c-parser.cc -+++ b/gcc/c/c-parser.cc -@@ -1515,7 +1515,7 @@ static tree c_parser_simple_asm_expr (c_parser *); - static tree c_parser_gnu_attributes (c_parser *); - static struct c_expr c_parser_initializer (c_parser *, tree); - static struct c_expr c_parser_braced_init (c_parser *, tree, bool, -- struct obstack *); -+ struct obstack *, tree); - static void c_parser_initelt (c_parser *, struct obstack *); - static void c_parser_initval (c_parser *, struct c_expr *, - struct obstack *); -@@ -5247,11 +5247,15 @@ static struct c_expr - c_parser_initializer (c_parser *parser, tree decl) - { - if (c_parser_next_token_is (parser, CPP_OPEN_BRACE)) -- return c_parser_braced_init (parser, NULL_TREE, false, NULL); -+ return c_parser_braced_init (parser, NULL_TREE, false, NULL, decl); - else - { - struct c_expr ret; - location_t loc = c_parser_peek_token (parser)->location; -+ if (decl != error_mark_node && C_DECL_VARIABLE_SIZE (decl)) -+ error_at (loc, -+ "variable-sized object may not be initialized except " -+ "with an empty initializer"); - ret = c_parser_expr_no_commas (parser, NULL); - /* This is handled mostly by gimplify.cc, but we have to deal with - not warning about int x = x; as it is a GCC extension to turn off -@@ -5278,11 +5282,12 @@ location_t last_init_list_comma; - compound literal, and NULL_TREE for other initializers and for - nested braced lists. NESTED_P is true for nested braced lists, - false for the list of a compound literal or the list that is the -- top-level initializer in a declaration. */ -+ top-level initializer in a declaration. DECL is the declaration for -+ the top-level initializer for a declaration, otherwise NULL_TREE. */ - - static struct c_expr - c_parser_braced_init (c_parser *parser, tree type, bool nested_p, -- struct obstack *outer_obstack) -+ struct obstack *outer_obstack, tree decl) - { - struct c_expr ret; - struct obstack braced_init_obstack; -@@ -5300,10 +5305,15 @@ c_parser_braced_init (c_parser *parser, tree type, bool nested_p, - really_start_incremental_init (type); - if (c_parser_next_token_is (parser, CPP_CLOSE_BRACE)) - { -- pedwarn (brace_loc, OPT_Wpedantic, "ISO C forbids empty initializer braces"); -+ pedwarn_c11 (brace_loc, OPT_Wpedantic, -+ "ISO C forbids empty initializer braces before C2X"); - } - else - { -+ if (decl && decl != error_mark_node && C_DECL_VARIABLE_SIZE (decl)) -+ error_at (brace_loc, -+ "variable-sized object may not be initialized except " -+ "with an empty initializer"); - /* Parse a non-empty initializer list, possibly with a trailing - comma. */ - while (true) -@@ -5559,7 +5569,7 @@ c_parser_initval (c_parser *parser, struct c_expr *after, - - if (c_parser_next_token_is (parser, CPP_OPEN_BRACE) && !after) - init = c_parser_braced_init (parser, NULL_TREE, true, -- braced_init_obstack); -+ braced_init_obstack, NULL_TREE); - else - { - init = c_parser_expr_no_commas (parser, after); -@@ -10312,7 +10322,7 @@ c_parser_postfix_expression_after_paren_type (c_parser *parser, - error_at (type_loc, "compound literal has variable size"); - type = error_mark_node; - } -- init = c_parser_braced_init (parser, type, false, NULL); -+ init = c_parser_braced_init (parser, type, false, NULL, NULL_TREE); - finish_init (); - maybe_warn_string_init (type_loc, type, init); - -diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc -index 603b03fe1..0889dd4cb 100644 ---- a/gcc/c/c-typeck.cc -+++ b/gcc/c/c-typeck.cc -@@ -8267,7 +8267,9 @@ digest_init (location_t init_loc, tree type, tree init, tree origtype, - - if (COMPLETE_TYPE_P (type) && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) - { -- error_init (init_loc, "variable-sized object may not be initialized"); -+ error_init (init_loc, -+ "variable-sized object may not be initialized except " -+ "with an empty initializer"); - return error_mark_node; - } - -@@ -8617,8 +8619,9 @@ really_start_incremental_init (tree type) - constructor_max_index = integer_minus_one_node; - - /* constructor_max_index needs to be an INTEGER_CST. Attempts -- to initialize VLAs will cause a proper error; avoid tree -- checking errors as well by setting a safe value. */ -+ to initialize VLAs with a nonempty initializer will cause a -+ proper error; avoid tree checking errors as well by setting a -+ safe value. */ - if (constructor_max_index - && TREE_CODE (constructor_max_index) != INTEGER_CST) - constructor_max_index = integer_minus_one_node; -@@ -9000,12 +9003,14 @@ pop_init_level (location_t loc, int implicit, - && !gnu_vector_type_p (constructor_type)) - { - /* A nonincremental scalar initializer--just return -- the element, after verifying there is just one. */ -+ the element, after verifying there is just one. -+ Empty scalar initializers are supported in C2X. */ - if (vec_safe_is_empty (constructor_elements)) - { -- if (!constructor_erroneous && constructor_type != error_mark_node) -- error_init (loc, "empty scalar initializer"); -- ret.value = error_mark_node; -+ if (constructor_erroneous || constructor_type == error_mark_node) -+ ret.value = error_mark_node; -+ else -+ ret.value = build_zero_cst (constructor_type); - } - else if (vec_safe_length (constructor_elements) != 1) - { -@@ -9090,7 +9095,7 @@ set_designator (location_t loc, bool array, - return true; - - /* Likewise for an initializer for a variable-size type. Those are -- diagnosed in digest_init. */ -+ diagnosed in the parser, except for empty initializer braces. */ - if (COMPLETE_TYPE_P (constructor_type) - && TREE_CODE (TYPE_SIZE (constructor_type)) != INTEGER_CST) - return true; -@@ -10251,7 +10256,7 @@ process_init_element (location_t loc, struct c_expr value, bool implicit, - return; - - /* Ignore elements of an initializer for a variable-size type. -- Those are diagnosed in digest_init. */ -+ Those are diagnosed in the parser (empty initializer braces are OK). */ - if (COMPLETE_TYPE_P (constructor_type) - && !poly_int_tree_p (TYPE_SIZE (constructor_type))) - return; -diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc -index a551c574a..91500e2fb 100644 ---- a/gcc/gimplify.cc -+++ b/gcc/gimplify.cc -@@ -6026,6 +6026,21 @@ gimplify_modify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, - return GS_ALL_DONE; - } - -+ /* Convert initialization from an empty variable-size CONSTRUCTOR to -+ memset. */ -+ if (TREE_TYPE (*from_p) != error_mark_node -+ && TYPE_SIZE_UNIT (TREE_TYPE (*from_p)) -+ && !poly_int_tree_p (TYPE_SIZE_UNIT (TREE_TYPE (*from_p))) -+ && TREE_CODE (*from_p) == CONSTRUCTOR -+ && CONSTRUCTOR_NELTS (*from_p) == 0) -+ { -+ maybe_with_size_expr (from_p); -+ gcc_assert (TREE_CODE (*from_p) == WITH_SIZE_EXPR); -+ return gimplify_modify_expr_to_memset (expr_p, -+ TREE_OPERAND (*from_p, 1), -+ want_value, pre_p); -+ } -+ - /* Insert pointer conversions required by the middle-end that are not - required by the frontend. This fixes middle-end type checking for - for example gcc.dg/redecl-6.c. */ -diff --git a/gcc/testsuite/gcc.dg/c11-empty-init-1.c b/gcc/testsuite/gcc.dg/c11-empty-init-1.c -new file mode 100644 -index 000000000..120c28225 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/c11-empty-init-1.c -@@ -0,0 +1,25 @@ -+/* Test C11 does not support empty initializers. */ -+/* { dg-do compile } */ -+/* { dg-options "-std=c11 -pedantic-errors" } */ -+ -+struct s { int a; }; -+struct s s = {}; /* { dg-error "empty initializer" } */ -+int x = {}; /* { dg-error "empty initializer" } */ -+float y = {}; /* { dg-error "empty initializer" } */ -+void *p = {}; /* { dg-error "empty initializer" } */ -+union u { int a; long b; }; -+union u z = {}; /* { dg-error "empty initializer" } */ -+int aa[2] = {}; /* { dg-error "empty initializer" } */ -+ -+void -+f (int a) -+{ -+ int vla[a] = {}; /* { dg-error "empty initializer" } */ -+ struct s as = {}; /* { dg-error "empty initializer" } */ -+ int ax = {}; /* { dg-error "empty initializer" } */ -+ float ay = {}; /* { dg-error "empty initializer" } */ -+ void *ap = {}; /* { dg-error "empty initializer" } */ -+ union u az = {}; /* { dg-error "empty initializer" } */ -+ int aaa[2] = {}; /* { dg-error "empty initializer" } */ -+ int t = (int) {}; /* { dg-error "empty initializer" } */ -+} -diff --git a/gcc/testsuite/gcc.dg/c11-empty-init-2.c b/gcc/testsuite/gcc.dg/c11-empty-init-2.c -new file mode 100644 -index 000000000..3ec7c512a ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/c11-empty-init-2.c -@@ -0,0 +1,25 @@ -+/* Test C11 does not support empty initializers. */ -+/* { dg-do compile } */ -+/* { dg-options "-std=c11 -pedantic" } */ -+ -+struct s { int a; }; -+struct s s = {}; /* { dg-warning "empty initializer" } */ -+int x = {}; /* { dg-warning "empty initializer" } */ -+float y = {}; /* { dg-warning "empty initializer" } */ -+void *p = {}; /* { dg-warning "empty initializer" } */ -+union u { int a; long b; }; -+union u z = {}; /* { dg-warning "empty initializer" } */ -+int aa[2] = {}; /* { dg-warning "empty initializer" } */ -+ -+void -+f (int a) -+{ -+ int vla[a] = {}; /* { dg-warning "empty initializer" } */ -+ struct s as = {}; /* { dg-warning "empty initializer" } */ -+ int ax = {}; /* { dg-warning "empty initializer" } */ -+ float ay = {}; /* { dg-warning "empty initializer" } */ -+ void *ap = {}; /* { dg-warning "empty initializer" } */ -+ union u az = {}; /* { dg-warning "empty initializer" } */ -+ int aaa[2] = {}; /* { dg-warning "empty initializer" } */ -+ int t = (int) {}; /* { dg-warning "empty initializer" } */ -+} -diff --git a/gcc/testsuite/gcc.dg/c11-empty-init-3.c b/gcc/testsuite/gcc.dg/c11-empty-init-3.c -new file mode 100644 -index 000000000..fd43fa789 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/c11-empty-init-3.c -@@ -0,0 +1,25 @@ -+/* Test C11 does not support empty initializers. */ -+/* { dg-do compile } */ -+/* { dg-options "-std=c11 -Wc11-c2x-compat" } */ -+ -+struct s { int a; }; -+struct s s = {}; /* { dg-warning "empty initializer" } */ -+int x = {}; /* { dg-warning "empty initializer" } */ -+float y = {}; /* { dg-warning "empty initializer" } */ -+void *p = {}; /* { dg-warning "empty initializer" } */ -+union u { int a; long b; }; -+union u z = {}; /* { dg-warning "empty initializer" } */ -+int aa[2] = {}; /* { dg-warning "empty initializer" } */ -+ -+void -+f (int a) -+{ -+ int vla[a] = {}; /* { dg-warning "empty initializer" } */ -+ struct s as = {}; /* { dg-warning "empty initializer" } */ -+ int ax = {}; /* { dg-warning "empty initializer" } */ -+ float ay = {}; /* { dg-warning "empty initializer" } */ -+ void *ap = {}; /* { dg-warning "empty initializer" } */ -+ union u az = {}; /* { dg-warning "empty initializer" } */ -+ int aaa[2] = {}; /* { dg-warning "empty initializer" } */ -+ int t = (int) {}; /* { dg-warning "empty initializer" } */ -+} -diff --git a/gcc/testsuite/gcc.dg/c2x-empty-init-1.c b/gcc/testsuite/gcc.dg/c2x-empty-init-1.c -new file mode 100644 -index 000000000..1487a2b23 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/c2x-empty-init-1.c -@@ -0,0 +1,80 @@ -+/* Test C2X support for empty initializers: valid use cases. */ -+/* { dg-do run } */ -+/* { dg-options "-std=c2x -pedantic-errors" } */ -+ -+extern void exit (int); -+extern void abort (void); -+ -+struct s { int a; }; -+struct s s = {}; -+int x = {}; -+float y = {}; -+void *p = {}; -+union u { int a; long b; }; -+union u z = {}; -+int aa[2] = {}; -+ -+void -+f (int a) -+{ -+ volatile int vla[a] = {}; -+ struct s as = {}; -+ int ax = {}; -+ float ay = {}; -+ void *ap = {}; -+ union u az = {}; -+ int aaa[2] = {}; -+ for (int i = 0; i < a; i++) -+ if (vla[i] != 0) -+ abort (); -+ if (as.a != 0) -+ abort (); -+ if (ax != 0) -+ abort (); -+ if (ay != 0) -+ abort (); -+ if (ap != 0) -+ abort (); -+ if (az.a != 0) -+ abort (); -+ if (aaa[0] != 0) -+ abort (); -+ if (aaa[1] != 0) -+ abort (); -+ if ((int) {} != 0) -+ abort (); -+ if ((float) {} != 0) -+ abort (); -+ if ((struct s) {}.a != 0) -+ abort (); -+ if ((union u) {}.a != 0) -+ abort (); -+ if ((int [5]) {}[2] != 0) -+ abort (); -+ /* Overwrite contents of vla before second call to make it more likely stack -+ contents are nonzero if proper initialization did not occur. */ -+ for (int i = 0; i < a; i++) -+ vla[i] = -1; -+} -+ -+int -+main (void) -+{ -+ f (100); -+ f (100); -+ if (s.a != 0) -+ abort (); -+ if (x != 0) -+ abort (); -+ if (y != 0) -+ abort (); -+ if (p != 0) -+ abort (); -+ if (z.a != 0) -+ abort (); -+ if (aa[0] != 0) -+ abort (); -+ if (aa[1] != 0) -+ abort (); -+ exit (0); -+} -diff --git a/gcc/testsuite/gcc.dg/c2x-empty-init-2.c b/gcc/testsuite/gcc.dg/c2x-empty-init-2.c -new file mode 100644 -index 000000000..0dc81ce5b ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/c2x-empty-init-2.c -@@ -0,0 +1,18 @@ -+/* Test C2X support for empty initializers: invalid use cases. */ -+/* { dg-do compile } */ -+/* { dg-options "-std=c2x -pedantic-errors" } */ -+ -+/* Empty initialization is invalid for arrays of unknown size. This is -+ diagnosed via the diagnostic for zero-size arrays. */ -+int x[] = {}; /* { dg-error "zero or negative size array" } */ -+ -+void -+f (int a) -+{ -+ int x1[] = {}; /* { dg-error "zero or negative size array" } */ -+ int x2[][a] = {}; /* { dg-error "zero or negative size array" } */ -+ /* Nonempty VLA initializers are still invalid. */ -+ int x3[a] = { 0 }; /* { dg-error "variable-sized object may not be initialized except with an empty initializer" } */ -+ /* Variable-size compound literals are still invalid. */ -+ (void) (int [a]) {}; /* { dg-error "compound literal has variable size" } */ -+} -diff --git a/gcc/testsuite/gcc.dg/c2x-empty-init-3.c b/gcc/testsuite/gcc.dg/c2x-empty-init-3.c -new file mode 100644 -index 000000000..472f8169c ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/c2x-empty-init-3.c -@@ -0,0 +1,25 @@ -+/* Test empty initializers diagnosed in C2X mode with -Wc11-c2x-compat. */ -+/* { dg-do compile } */ -+/* { dg-options "-std=c2x -Wc11-c2x-compat" } */ -+ -+struct s { int a; }; -+struct s s = {}; /* { dg-warning "empty initializer" } */ -+int x = {}; /* { dg-warning "empty initializer" } */ -+float y = {}; /* { dg-warning "empty initializer" } */ -+void *p = {}; /* { dg-warning "empty initializer" } */ -+union u { int a; long b; }; -+union u z = {}; /* { dg-warning "empty initializer" } */ -+int aa[2] = {}; /* { dg-warning "empty initializer" } */ -+ -+void -+f (int a) -+{ -+ int vla[a] = {}; /* { dg-warning "empty initializer" } */ -+ struct s as = {}; /* { dg-warning "empty initializer" } */ -+ int ax = {}; /* { dg-warning "empty initializer" } */ -+ float ay = {}; /* { dg-warning "empty initializer" } */ -+ void *ap = {}; /* { dg-warning "empty initializer" } */ -+ union u az = {}; /* { dg-warning "empty initializer" } */ -+ int aaa[2] = {}; /* { dg-warning "empty initializer" } */ -+ int t = (int) {}; /* { dg-warning "empty initializer" } */ -+} -diff --git a/gcc/testsuite/gcc.dg/gnu2x-empty-init-1.c b/gcc/testsuite/gcc.dg/gnu2x-empty-init-1.c -new file mode 100644 -index 000000000..e7dc9dfde ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/gnu2x-empty-init-1.c -@@ -0,0 +1,29 @@ -+/* Test C2X support for empty initializers: valid use cases with GNU -+ extensions. */ -+/* { dg-do run } */ -+/* { dg-options "-std=gnu2x" } */ -+ -+extern void exit (int); -+extern void abort (void); -+ -+void -+f (int a) -+{ -+ struct s { volatile int x[a]; }; -+ struct s b = {}; -+ for (int i = 0; i < a; i++) -+ if (b.x[i] != 0) -+ abort (); -+ /* Overwrite contents of b.x before second call to make it more likely stack -+ contents are nonzero if proper initialization did not occur. */ -+ for (int i = 0; i < a; i++) -+ b.x[i] = -1; -+} -+ -+int -+main (void) -+{ -+ f (100); -+ f (100); -+ exit (0); -+} -diff --git a/gcc/testsuite/gcc.dg/gnu2x-empty-init-2.c b/gcc/testsuite/gcc.dg/gnu2x-empty-init-2.c -new file mode 100644 -index 000000000..69ee4e36b ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/gnu2x-empty-init-2.c -@@ -0,0 +1,16 @@ -+/* Test C2X support for empty initializers: invalid use cases with GNU -+ extensions. */ -+/* { dg-do compile } */ -+/* { dg-options "-std=gnu2x" } */ -+ -+void -+f (int a) -+{ -+ /* Make sure a non-braced initializer for a VLA-in-struct is still not -+ allowed. */ -+ struct s { int x[a]; }; -+ struct s b; -+ for (int i = 0; i < a; i++) -+ b.x[i] = 0; -+ struct s c = b; /* { dg-error "variable-sized object may not be initialized except with an empty initializer" } */ -+} -diff --git a/gcc/testsuite/gcc.dg/init-bad-1.c b/gcc/testsuite/gcc.dg/init-bad-1.c -index 61734045f..0da10c315 100644 ---- a/gcc/testsuite/gcc.dg/init-bad-1.c -+++ b/gcc/testsuite/gcc.dg/init-bad-1.c -@@ -21,8 +21,7 @@ char t1[1] = { "xy" }; /* { dg-warning "initializer-string for array of 'char' i - char u[1] = { "x", "x" }; /* { dg-error "excess elements in 'char' array initializer" } */ - /* { dg-message "near init" "near" { target *-*-* } .-1 } */ - --int i = { }; /* { dg-error "empty scalar initializer" } */ --/* { dg-message "near init" "near" { target *-*-* } .-1 } */ -+int i = { }; - - int j = { 1 }; - -diff --git a/gcc/testsuite/gcc.dg/noncompile/pr71583.c b/gcc/testsuite/gcc.dg/noncompile/pr71583.c -index 5045b88b6..fe6e556ad 100644 ---- a/gcc/testsuite/gcc.dg/noncompile/pr71583.c -+++ b/gcc/testsuite/gcc.dg/noncompile/pr71583.c -@@ -5,7 +5,7 @@ void - f (int i) - { - (int (*)[++i]) { int }; /* { dg-error "expected" } */ -- (int (*)[++i]) { }; /* { dg-error "empty" } */ -+ (int (*)[++i]) { }; - (int (*)[++i]) { , }; /* { dg-error "expected" } */ - (int (*)[++i]) { f () }; /* { dg-error "too few" } */ - } -diff --git a/gcc/testsuite/gcc.dg/pr61096-1.c b/gcc/testsuite/gcc.dg/pr61096-1.c -index e707904c0..f41789c5f 100644 ---- a/gcc/testsuite/gcc.dg/pr61096-1.c -+++ b/gcc/testsuite/gcc.dg/pr61096-1.c -@@ -36,7 +36,7 @@ struct S s = { { 1 }, { 3 } }; /* { dg-error "23:extra brace group at end of ini - struct g g1 = { {0, { 1 } } }; /* { dg-error "21:initialization of flexible array member in a nested context" } */ - struct g g2 = { .f[0] = 1 }; /* { dg-error "20:array index in non-array initializer" } */ - --__extension__ int a8 = { }; /* { dg-error "24:empty scalar initializer" } */ -+__extension__ int a8 = { }; - int a9[10] = {[1.2] = 2 }; /* { dg-error "16:array index in initializer not of integer type" } */ - int a10[10] = {[e] = 2 }; /* { dg-error "17:nonconstant array index in initializer" } */ - __extension__ int a11[10] = {[1 ... e] = 1 }; /* { dg-error "31:nonconstant array index in initializer" } */ -diff --git a/gcc/testsuite/gcc.dg/ubsan/c-shift-1.c b/gcc/testsuite/gcc.dg/ubsan/c-shift-1.c -index 9d561016f..f88ee2de3 100644 ---- a/gcc/testsuite/gcc.dg/ubsan/c-shift-1.c -+++ b/gcc/testsuite/gcc.dg/ubsan/c-shift-1.c -@@ -7,12 +7,12 @@ int - main (void) - { - /* None of the following should pass. */ -- int A[1 >> -1] = {}; /* { dg-error "variable-sized object may not be initialized" } */ -- int B[-1 >> -1] = {}; /* { dg-error "variable-sized object may not be initialized" } */ -- int D[1 << -1] = {}; /* { dg-error "variable-sized object may not be initialized" } */ -- int E[-1 << -1] = {}; /* { dg-error "variable-sized object may not be initialized" } */ -- int F[-1 >> 200] = {}; /* { dg-error "variable-sized object may not be initialized" } */ -- int G[1 << 200] = {}; /* { dg-error "variable-sized object may not be initialized" } */ -+ int A[1 >> -1] = { 0 }; /* { dg-error "variable-sized object may not be initialized" } */ -+ int B[-1 >> -1] = { 0 }; /* { dg-error "variable-sized object may not be initialized" } */ -+ int D[1 << -1] = { 0 }; /* { dg-error "variable-sized object may not be initialized" } */ -+ int E[-1 << -1] = { 0 }; /* { dg-error "variable-sized object may not be initialized" } */ -+ int F[-1 >> 200] = { 0 }; /* { dg-error "variable-sized object may not be initialized" } */ -+ int G[1 << 200] = { 0 }; /* { dg-error "variable-sized object may not be initialized" } */ - - return 0; - } -diff --git a/gcc/testsuite/gcc.dg/vla-init-2.c b/gcc/testsuite/gcc.dg/vla-init-2.c -index 19fbffc26..f23630a36 100644 ---- a/gcc/testsuite/gcc.dg/vla-init-2.c -+++ b/gcc/testsuite/gcc.dg/vla-init-2.c -@@ -7,4 +7,3 @@ - - const int i = 1; - void foo() { char a[][i] = {""}; } /* { dg-error "variable-sized object may not be initialized" } */ --/* { dg-error "array size missing in 'a'" "extra error" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/gcc.dg/vla-init-3.c b/gcc/testsuite/gcc.dg/vla-init-3.c -index 55e1de69c..a854f1268 100644 ---- a/gcc/testsuite/gcc.dg/vla-init-3.c -+++ b/gcc/testsuite/gcc.dg/vla-init-3.c -@@ -6,4 +6,3 @@ - /* { dg-options "" } */ - - void foo(int i) { char a[][i] = {""}; } /* { dg-error "variable-sized object may not be initialized" } */ --/* { dg-error "array size missing in 'a'" "extra error" { target *-*-* } .-1 } */ --- -2.33.0 - diff --git a/0235-Backport-SME-aarch64-Update-sizeless-tests-for-recen.patch b/0235-Backport-SME-aarch64-Update-sizeless-tests-for-recen.patch deleted file mode 100644 index 04a80d1..0000000 --- a/0235-Backport-SME-aarch64-Update-sizeless-tests-for-recen.patch +++ /dev/null @@ -1,115 +0,0 @@ -From 67001778883e10110c505dd8876a447a19d1ac5e Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Wed, 31 Aug 2022 15:39:27 +0100 -Subject: [PATCH 136/157] [Backport][SME] aarch64: Update sizeless tests for - recent GNU C changes - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=de9805c08121a84ce368dccfe043a3f44c3ff13b - -The tests for sizeless SVE types include checks that the types -are handled for initialisation purposes in the same way as scalars. -GNU C and C2x now allow scalars to be initialised using empty braces, -so this patch updates the SVE tests to match. - -gcc/testsuite/ - * gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c: Update - tests for empty initializers. - * gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c: Likewise. - * gcc.target/aarch64/sve/acle/general-c/sizeless-1.c: Likewise. - * gcc.target/aarch64/sve/acle/general-c/sizeless-2.c: Likewise. ---- - .../gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c | 4 ++-- - .../gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c | 4 ++-- - .../gcc.target/aarch64/sve/acle/general-c/sizeless-1.c | 4 ++-- - .../gcc.target/aarch64/sve/acle/general-c/sizeless-2.c | 4 ++-- - 4 files changed, 8 insertions(+), 8 deletions(-) - -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c -index 285751eeb..9db953583 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c -@@ -12,7 +12,7 @@ f (svuint8_t sve_u1, svint8_t sve_s1, - /* Initialization. */ - - svuint8_t init_sve_u1 = 0; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ -- svuint8_t init_sve_u2 = {}; /* { dg-error {empty scalar initializer} } */ -+ svuint8_t init_sve_u2 = {}; - svuint8_t init_sve_u3 = { sve_u1 }; - svuint8_t init_sve_u4 = { gnu_u1 }; - svuint8_t init_sve_u5 = { sve_s1 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'svint8_t'} } */ -@@ -31,7 +31,7 @@ f (svuint8_t sve_u1, svint8_t sve_s1, - - /* Compound literals. */ - -- (svuint8_t) {}; /* { dg-error {empty scalar initializer} } */ -+ (svuint8_t) {}; - (svuint8_t) { 0 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ - (svuint8_t) { sve_u1 }; - (svuint8_t) { gnu_u1 }; -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c -index 306fd4780..c05b16406 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c -@@ -12,7 +12,7 @@ f (svuint8_t sve_u1, svint8_t sve_s1, - /* Initialization. */ - - svuint8_t init_sve_u1 = 0; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ -- svuint8_t init_sve_u2 = {}; /* { dg-error {empty scalar initializer} } */ -+ svuint8_t init_sve_u2 = {}; - svuint8_t init_sve_u3 = { sve_u1 }; - svuint8_t init_sve_u4 = { gnu_u1 }; - svuint8_t init_sve_u5 = { sve_s1 }; -@@ -31,7 +31,7 @@ f (svuint8_t sve_u1, svint8_t sve_s1, - - /* Compound literals. */ - -- (svuint8_t) {}; /* { dg-error {empty scalar initializer} } */ -+ (svuint8_t) {}; - (svuint8_t) { 0 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ - (svuint8_t) { sve_u1 }; - (svuint8_t) { gnu_u1 }; -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c -index 7fc51e7ad..4b34a71c1 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c -@@ -66,14 +66,14 @@ statements (int n) - - svint8_t init_sve_sc1 = sve_sc1; - svint8_t init_sve_sc2 = sve_sh1; /* { dg-error {incompatible types when initializing type 'svint8_t' using type 'svint16_t'} } */ -- svint8_t init_sve_sc3 = {}; /* { dg-error {empty scalar initializer} } */ -+ svint8_t init_sve_sc3 = {}; - - int initi_a = sve_sc1; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ - int initi_b = { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ - - /* Compound literals. */ - -- (svint8_t) {}; /* { dg-error {empty scalar initializer} } */ -+ (svint8_t) {}; - (svint8_t) { sve_sc1 }; - - (int) { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c -index c575492c1..34dfd598e 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c -@@ -66,14 +66,14 @@ statements (int n) - - svint8_t init_sve_sc1 = sve_sc1; - svint8_t init_sve_sc2 = sve_sh1; /* { dg-error {incompatible types when initializing type 'svint8_t' using type 'svint16_t'} } */ -- svint8_t init_sve_sc3 = {}; /* { dg-error {empty scalar initializer} } */ -+ svint8_t init_sve_sc3 = {}; - - int initi_a = sve_sc1; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ - int initi_b = { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ - - /* Compound literals. */ - -- (svint8_t) {}; /* { dg-error {empty scalar initializer} } */ -+ (svint8_t) {}; - (svint8_t) { sve_sc1 }; - - (int) { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ --- -2.33.0 - diff --git a/0236-Backport-SME-attribs-Namespace-aware-lookup_attribut.patch b/0236-Backport-SME-attribs-Namespace-aware-lookup_attribut.patch deleted file mode 100644 index f7a909f..0000000 --- a/0236-Backport-SME-attribs-Namespace-aware-lookup_attribut.patch +++ /dev/null @@ -1,58 +0,0 @@ -From dbe5a29054d4eb1e0f5173c8f2291569eac71c96 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Sat, 2 Dec 2023 13:49:55 +0000 -Subject: [PATCH 137/157] [Backport][SME] attribs: Namespace-aware - lookup_attribute_spec - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=df4643f90c45db2501c731d4fded60dc1426b484 - -attribute_ignored_p already used a namespace-aware query -to find the attribute_spec for an existing attribute: - - const attribute_spec *as = lookup_attribute_spec (TREE_PURPOSE (attr)); - -This patch does the same for other callers in the file. - -gcc/ - * attribs.cc (comp_type_attributes): Pass the full TREE_PURPOSE - to lookup_attribute_spec, rather than just the name. - (remove_attributes_matching): Likewise. ---- - gcc/attribs.cc | 7 +++---- - 1 file changed, 3 insertions(+), 4 deletions(-) - -diff --git a/gcc/attribs.cc b/gcc/attribs.cc -index 8e2696bc5..1dbc30a95 100644 ---- a/gcc/attribs.cc -+++ b/gcc/attribs.cc -@@ -1417,7 +1417,7 @@ comp_type_attributes (const_tree type1, const_tree type2) - const struct attribute_spec *as; - const_tree attr; - -- as = lookup_attribute_spec (get_attribute_name (a)); -+ as = lookup_attribute_spec (TREE_PURPOSE (a)); - if (!as || as->affects_type_identity == false) - continue; - -@@ -1431,7 +1431,7 @@ comp_type_attributes (const_tree type1, const_tree type2) - { - const struct attribute_spec *as; - -- as = lookup_attribute_spec (get_attribute_name (a)); -+ as = lookup_attribute_spec (TREE_PURPOSE (a)); - if (!as || as->affects_type_identity == false) - continue; - -@@ -1473,8 +1473,7 @@ remove_attributes_matching (tree attrs, Predicate predicate) - const_tree start = attrs; - for (const_tree attr = attrs; attr; attr = TREE_CHAIN (attr)) - { -- tree name = get_attribute_name (attr); -- const attribute_spec *as = lookup_attribute_spec (name); -+ const attribute_spec *as = lookup_attribute_spec (TREE_PURPOSE (attr)); - const_tree end; - if (!predicate (attr, as)) - end = attr; --- -2.33.0 - diff --git a/0237-Backport-SME-c-family-ICE-with-gnu-nocf_check-PR1069.patch b/0237-Backport-SME-c-family-ICE-with-gnu-nocf_check-PR1069.patch deleted file mode 100644 index 3d8cde6..0000000 --- a/0237-Backport-SME-c-family-ICE-with-gnu-nocf_check-PR1069.patch +++ /dev/null @@ -1,281 +0,0 @@ -From 6f42edc5035b7f7e96730dca19757b148e1be70c Mon Sep 17 00:00:00 2001 -From: Marek Polacek -Date: Thu, 29 Sep 2022 17:49:32 -0400 -Subject: [PATCH 138/157] [Backport][SME] c-family: ICE with - [[gnu::nocf_check]] [PR106937] - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=67efffec943656a509e036cd3c785a5c3d6885e1 - -When getting the name of an attribute, we ought to use -get_attribute_name, which handles both [[]] and __attribute__(()) -forms. Failure to do so may result in an ICE, like here. - -pp_c_attributes_display wasn't able to print the [[]] form of -attributes, so this patch teaches it to. - -When printing a pointer to function with a standard attribute, the attribute -should be printed after the parameter-list. With this patch we print: - - aka 'void (*)(int) [[gnu::nocf_check]]' - -or, in C++ with noexcept: - - aka 'void (*)(int) noexcept [[gnu::nocf_check]]' - -pp_c_attributes has been unused since its introduction in r56273 so -this patch removes it. - - PR c++/106937 - -gcc/c-family/ChangeLog: - - * c-pretty-print.cc (pp_c_specifier_qualifier_list): Print only GNU - attributes here. - (c_pretty_printer::direct_abstract_declarator): Print the standard [[]] - attributes here. - (pp_c_attributes): Remove. - (pp_c_attributes_display): Print the [[]] form if appropriate. Use - get_attribute_name. Don't print a trailing space when printing the - [[]] form. - * c-pretty-print.h (pp_c_attributes): Remove. - -gcc/cp/ChangeLog: - - * error.cc: Include "attribs.h". - (dump_type_prefix): Print only GNU attributes here. - (dump_type_suffix): Print standard attributes here. - -gcc/testsuite/ChangeLog: - - * c-c++-common/pointer-to-fn1.c: New test. ---- - gcc/c-family/c-pretty-print.cc | 96 ++++++++++++--------- - gcc/c-family/c-pretty-print.h | 1 - - gcc/cp/error.cc | 16 +++- - gcc/testsuite/c-c++-common/pointer-to-fn1.c | 18 ++++ - 4 files changed, 86 insertions(+), 45 deletions(-) - create mode 100644 gcc/testsuite/c-c++-common/pointer-to-fn1.c - -diff --git a/gcc/c-family/c-pretty-print.cc b/gcc/c-family/c-pretty-print.cc -index 71a0cb510..4d60627b3 100644 ---- a/gcc/c-family/c-pretty-print.cc -+++ b/gcc/c-family/c-pretty-print.cc -@@ -462,7 +462,12 @@ pp_c_specifier_qualifier_list (c_pretty_printer *pp, tree t) - { - pp_c_whitespace (pp); - pp_c_left_paren (pp); -- pp_c_attributes_display (pp, TYPE_ATTRIBUTES (pointee)); -+ /* If we're dealing with the GNU form of attributes, print this: -+ void (__attribute__((noreturn)) *f) (); -+ If it is the standard [[]] attribute, we'll print the attribute -+ in c_pretty_printer::direct_abstract_declarator/FUNCTION_TYPE. */ -+ if (!cxx11_attribute_p (TYPE_ATTRIBUTES (pointee))) -+ pp_c_attributes_display (pp, TYPE_ATTRIBUTES (pointee)); - } - else if (!c_dialect_cxx ()) - pp_c_whitespace (pp); -@@ -591,6 +596,13 @@ c_pretty_printer::direct_abstract_declarator (tree t) - case FUNCTION_TYPE: - pp_c_parameter_type_list (this, t); - direct_abstract_declarator (TREE_TYPE (t)); -+ /* If this is the standard [[]] attribute, print -+ void (*)() [[noreturn]]; */ -+ if (cxx11_attribute_p (TYPE_ATTRIBUTES (t))) -+ { -+ pp_space (this); -+ pp_c_attributes_display (this, TYPE_ATTRIBUTES (t)); -+ } - break; - - case ARRAY_TYPE: -@@ -845,32 +857,7 @@ c_pretty_printer::declaration (tree t) - pp_c_init_declarator (this, t); - } - --/* Pretty-print ATTRIBUTES using GNU C extension syntax. */ -- --void --pp_c_attributes (c_pretty_printer *pp, tree attributes) --{ -- if (attributes == NULL_TREE) -- return; -- -- pp_c_ws_string (pp, "__attribute__"); -- pp_c_left_paren (pp); -- pp_c_left_paren (pp); -- for (; attributes != NULL_TREE; attributes = TREE_CHAIN (attributes)) -- { -- pp_tree_identifier (pp, TREE_PURPOSE (attributes)); -- if (TREE_VALUE (attributes)) -- pp_c_call_argument_list (pp, TREE_VALUE (attributes)); -- -- if (TREE_CHAIN (attributes)) -- pp_separate_with (pp, ','); -- } -- pp_c_right_paren (pp); -- pp_c_right_paren (pp); --} -- --/* Pretty-print ATTRIBUTES using GNU C extension syntax for attributes -- marked to be displayed on disgnostic. */ -+/* Pretty-print ATTRIBUTES marked to be displayed on diagnostic. */ - - void - pp_c_attributes_display (c_pretty_printer *pp, tree a) -@@ -880,10 +867,12 @@ pp_c_attributes_display (c_pretty_printer *pp, tree a) - if (a == NULL_TREE) - return; - -+ const bool std_p = cxx11_attribute_p (a); -+ - for (; a != NULL_TREE; a = TREE_CHAIN (a)) - { -- const struct attribute_spec *as; -- as = lookup_attribute_spec (TREE_PURPOSE (a)); -+ const struct attribute_spec *as -+ = lookup_attribute_spec (get_attribute_name (a)); - if (!as || as->affects_type_identity == false) - continue; - if (c_dialect_cxx () -@@ -891,26 +880,47 @@ pp_c_attributes_display (c_pretty_printer *pp, tree a) - /* In C++ transaction_safe is printed at the end of the declarator. */ - continue; - if (is_first) -- { -- pp_c_ws_string (pp, "__attribute__"); -- pp_c_left_paren (pp); -- pp_c_left_paren (pp); -- is_first = false; -- } -+ { -+ if (std_p) -+ { -+ pp_c_left_bracket (pp); -+ pp_c_left_bracket (pp); -+ } -+ else -+ { -+ pp_c_ws_string (pp, "__attribute__"); -+ pp_c_left_paren (pp); -+ pp_c_left_paren (pp); -+ } -+ is_first = false; -+ } - else -- { -- pp_separate_with (pp, ','); -- } -- pp_tree_identifier (pp, TREE_PURPOSE (a)); -+ pp_separate_with (pp, ','); -+ tree ns; -+ if (std_p && (ns = get_attribute_namespace (a))) -+ { -+ pp_tree_identifier (pp, ns); -+ pp_colon (pp); -+ pp_colon (pp); -+ } -+ pp_tree_identifier (pp, get_attribute_name (a)); - if (TREE_VALUE (a)) -- pp_c_call_argument_list (pp, TREE_VALUE (a)); -+ pp_c_call_argument_list (pp, TREE_VALUE (a)); - } - - if (!is_first) - { -- pp_c_right_paren (pp); -- pp_c_right_paren (pp); -- pp_c_whitespace (pp); -+ if (std_p) -+ { -+ pp_c_right_bracket (pp); -+ pp_c_right_bracket (pp); -+ } -+ else -+ { -+ pp_c_right_paren (pp); -+ pp_c_right_paren (pp); -+ pp_c_whitespace (pp); -+ } - } - } - -diff --git a/gcc/c-family/c-pretty-print.h b/gcc/c-family/c-pretty-print.h -index ba7624dab..6a9090919 100644 ---- a/gcc/c-family/c-pretty-print.h -+++ b/gcc/c-family/c-pretty-print.h -@@ -119,7 +119,6 @@ void pp_c_space_for_pointer_operator (c_pretty_printer *, tree); - /* Declarations. */ - void pp_c_tree_decl_identifier (c_pretty_printer *, tree); - void pp_c_function_definition (c_pretty_printer *, tree); --void pp_c_attributes (c_pretty_printer *, tree); - void pp_c_attributes_display (c_pretty_printer *, tree); - void pp_c_cv_qualifiers (c_pretty_printer *pp, int qualifiers, bool func_type); - void pp_c_type_qualifier_list (c_pretty_printer *, tree); -diff --git a/gcc/cp/error.cc b/gcc/cp/error.cc -index 1e944ca3f..eeb12a7b4 100644 ---- a/gcc/cp/error.cc -+++ b/gcc/cp/error.cc -@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see - #include "internal-fn.h" - #include "gcc-rich-location.h" - #include "cp-name-hint.h" -+#include "attribs.h" - - #define pp_separate_with_comma(PP) pp_cxx_separate_with (PP, ',') - #define pp_separate_with_semicolon(PP) pp_cxx_separate_with (PP, ';') -@@ -896,7 +897,12 @@ dump_type_prefix (cxx_pretty_printer *pp, tree t, int flags) - { - pp_cxx_whitespace (pp); - pp_cxx_left_paren (pp); -- pp_c_attributes_display (pp, TYPE_ATTRIBUTES (sub)); -+ /* If we're dealing with the GNU form of attributes, print this: -+ void (__attribute__((noreturn)) *f) (); -+ If it is the standard [[]] attribute, we'll print the attribute -+ in dump_type_suffix. */ -+ if (!cxx11_attribute_p (TYPE_ATTRIBUTES (sub))) -+ pp_c_attributes_display (pp, TYPE_ATTRIBUTES (sub)); - } - if (TYPE_PTR_P (t)) - pp_star (pp); -@@ -1029,6 +1035,14 @@ dump_type_suffix (cxx_pretty_printer *pp, tree t, int flags) - if (tx_safe_fn_type_p (t)) - pp_cxx_ws_string (pp, "transaction_safe"); - dump_exception_spec (pp, TYPE_RAISES_EXCEPTIONS (t), flags); -+ /* If this is the standard [[]] attribute, print -+ void (*)() [[noreturn]]; */ -+ if (cxx11_attribute_p (TYPE_ATTRIBUTES (t))) -+ { -+ pp_space (pp); -+ pp_c_attributes_display (pp, TYPE_ATTRIBUTES (t)); -+ pp->padding = pp_before; -+ } - dump_type_suffix (pp, TREE_TYPE (t), flags); - break; - } -diff --git a/gcc/testsuite/c-c++-common/pointer-to-fn1.c b/gcc/testsuite/c-c++-common/pointer-to-fn1.c -new file mode 100644 -index 000000000..975885462 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/pointer-to-fn1.c -@@ -0,0 +1,18 @@ -+/* PR c++/106937 */ -+/* { dg-options "-fcf-protection" } */ -+/* { dg-additional-options "-std=c++11 -fpermissive" { target c++ } } */ -+/* Test printing a pointer to function with attribute. */ -+ -+__attribute__((nocf_check)) typedef void (*FPA1)(); -+[[gnu::nocf_check]] typedef void (*FPA2)(int); -+typedef void (*FP1)(); -+typedef void (*FP2)(int); -+ -+void -+g (FP1 f1, FP2 f2) -+{ -+ FPA1 p1 = f1; // { dg-warning {aka 'void \(__attribute__\(\(nocf_check\)\) \*\)\(\)'} } -+ FPA2 p2 = f2; // { dg-warning {aka 'void \(\*\)\(int\) \[\[gnu::nocf_check\]\]'} } -+ FP1 p3 = p1; // { dg-warning {aka 'void \(__attribute__\(\(nocf_check\)\) \*\)\(\)'} } -+ FP2 p4 = p2; // { dg-warning {aka 'void \(\*\)\(int\) \[\[gnu::nocf_check\]\]'} } -+} --- -2.33.0 - diff --git a/0238-Backport-SME-AArch64-Fix-assert-in-aarch64_move_imm-.patch b/0238-Backport-SME-AArch64-Fix-assert-in-aarch64_move_imm-.patch deleted file mode 100644 index 58c1071..0000000 --- a/0238-Backport-SME-AArch64-Fix-assert-in-aarch64_move_imm-.patch +++ /dev/null @@ -1,35 +0,0 @@ -From d13efe98cafa04aeb24f8e0f695e648887986228 Mon Sep 17 00:00:00 2001 -From: Wilco Dijkstra -Date: Wed, 7 Dec 2022 14:16:24 +0000 -Subject: [PATCH 139/157] [Backport][SME] AArch64: Fix assert in - aarch64_move_imm [PR108006] - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=952c8a1dc6235dc49ab207a7f18f63d2bc97fbc9 - -Ensure we only pass SI/DImode which fixes the assert. - -gcc/ - PR target/108006 - * config/aarch64/aarch64.cc (aarch64_expand_sve_const_vector): - Fix call to aarch64_move_imm to use SI/DI. ---- - gcc/config/aarch64/aarch64.cc | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc -index 0117a3e12..309ecc3d9 100644 ---- a/gcc/config/aarch64/aarch64.cc -+++ b/gcc/config/aarch64/aarch64.cc -@@ -7925,7 +7925,8 @@ aarch64_expand_sve_const_vector (rtx target, rtx src) - /* If the integer can be moved into a general register by a - single instruction, do that and duplicate the result. */ - if (CONST_INT_P (elt_value) -- && aarch64_move_imm (INTVAL (elt_value), elt_mode)) -+ && aarch64_move_imm (INTVAL (elt_value), -+ encoded_bits <= 32 ? SImode : DImode)) - { - elt_value = force_reg (elt_mode, elt_value); - return expand_vector_broadcast (mode, elt_value); --- -2.33.0 - diff --git a/0239-Backport-SME-testsuite-Only-run-fcf-protection-test-.patch b/0239-Backport-SME-testsuite-Only-run-fcf-protection-test-.patch deleted file mode 100644 index df5e047..0000000 --- a/0239-Backport-SME-testsuite-Only-run-fcf-protection-test-.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 071f26ce18db5a09cbae0607b065028a09a856ac Mon Sep 17 00:00:00 2001 -From: Marek Polacek -Date: Tue, 11 Oct 2022 12:51:40 -0400 -Subject: [PATCH 140/157] [Backport][SME] testsuite: Only run -fcf-protection - test on i?86/x86_64 [PR107213] - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=cc694f45087c892e69ebbb177203c708f00b1bc7 - -This test fails on non-i?86/x86_64 targets because on those targets -we get - - error: '-fcf-protection=full' is not supported for this target - -so this patch limits where the test is run. - - PR testsuite/107213 - -gcc/testsuite/ChangeLog: - - * c-c++-common/pointer-to-fn1.c: Only run on i?86/x86_64. ---- - gcc/testsuite/c-c++-common/pointer-to-fn1.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/gcc/testsuite/c-c++-common/pointer-to-fn1.c b/gcc/testsuite/c-c++-common/pointer-to-fn1.c -index 975885462..e2f948d82 100644 ---- a/gcc/testsuite/c-c++-common/pointer-to-fn1.c -+++ b/gcc/testsuite/c-c++-common/pointer-to-fn1.c -@@ -1,4 +1,5 @@ - /* PR c++/106937 */ -+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ - /* { dg-options "-fcf-protection" } */ - /* { dg-additional-options "-std=c++11 -fpermissive" { target c++ } } */ - /* Test printing a pointer to function with attribute. */ --- -2.33.0 - diff --git a/0240-Backport-SME-Fix-PRs-106764-106765-and-107307-all-IC.patch b/0240-Backport-SME-Fix-PRs-106764-106765-and-107307-all-IC.patch deleted file mode 100644 index d048c48..0000000 --- a/0240-Backport-SME-Fix-PRs-106764-106765-and-107307-all-IC.patch +++ /dev/null @@ -1,113 +0,0 @@ -From 202ebc25e509ae0a2ac7d05c822cf6a8a817e49a Mon Sep 17 00:00:00 2001 -From: Andrew Pinski -Date: Thu, 17 Nov 2022 22:08:07 +0000 -Subject: [PATCH 141/157] [Backport][SME] Fix PRs 106764, 106765, and 107307, - all ICE after invalid re-declaration - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bd0c9d9e706adaeea0d96152daade0a6819a8715 - -The problem here is the gimplifier returns GS_ERROR but -in some cases we don't check that soon enough and try -to do other work which could crash. -So the fix in these two cases is to return GS_ERROR -early if the gimplify_* functions had return GS_ERROR. - -OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. - -Thanks, -Andrew Pinski - -gcc/ChangeLog: - - PR c/106764 - PR c/106765 - PR c/107307 - * gimplify.cc (gimplify_compound_lval): Return GS_ERROR - if gimplify_expr had return GS_ERROR. - (gimplify_call_expr): Likewise. - -gcc/testsuite/ChangeLog: - - PR c/106764 - PR c/106765 - PR c/107307 - * gcc.dg/redecl-19.c: New test. - * gcc.dg/redecl-20.c: New test. - * gcc.dg/redecl-21.c: New test. ---- - gcc/gimplify.cc | 5 +++++ - gcc/testsuite/gcc.dg/redecl-19.c | 5 +++++ - gcc/testsuite/gcc.dg/redecl-20.c | 9 +++++++++ - gcc/testsuite/gcc.dg/redecl-21.c | 9 +++++++++ - 4 files changed, 28 insertions(+) - create mode 100644 gcc/testsuite/gcc.dg/redecl-19.c - create mode 100644 gcc/testsuite/gcc.dg/redecl-20.c - create mode 100644 gcc/testsuite/gcc.dg/redecl-21.c - -diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc -index 91500e2fb..e9f527850 100644 ---- a/gcc/gimplify.cc -+++ b/gcc/gimplify.cc -@@ -3272,6 +3272,8 @@ gimplify_compound_lval (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, - tret = gimplify_expr (p, pre_p, post_p, is_gimple_min_lval, - fallback | fb_lvalue); - ret = MIN (ret, tret); -+ if (ret == GS_ERROR) -+ return GS_ERROR; - - /* Step 2a: if we have component references we do not support on - registers then make sure the base isn't a register. Of course -@@ -3664,6 +3666,9 @@ gimplify_call_expr (tree *expr_p, gimple_seq *pre_p, bool want_value) - ret = gimplify_expr (&CALL_EXPR_FN (*expr_p), pre_p, NULL, - is_gimple_call_addr, fb_rvalue); - -+ if (ret == GS_ERROR) -+ return GS_ERROR; -+ - nargs = call_expr_nargs (*expr_p); - - /* Get argument types for verification. */ -diff --git a/gcc/testsuite/gcc.dg/redecl-19.c b/gcc/testsuite/gcc.dg/redecl-19.c -new file mode 100644 -index 000000000..cc1068544 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/redecl-19.c -@@ -0,0 +1,5 @@ -+/* We used to ICE in the gimplifier, PR 106764 */ -+/* { dg-do compile } */ -+/* { dg-options "-w" } */ -+(*a)(); // { dg-note "" } -+b(){a()} a; // { dg-error "" } -diff --git a/gcc/testsuite/gcc.dg/redecl-20.c b/gcc/testsuite/gcc.dg/redecl-20.c -new file mode 100644 -index 000000000..07f52115e ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/redecl-20.c -@@ -0,0 +1,9 @@ -+/* We used to ICE in the gimplifier, PR 107307 */ -+// { dg-do compile } -+// { dg-options "-w" } -+void f () -+{ -+ const struct { int a[1]; } b; // { dg-note "" } -+ int *c = b.a; -+ int *b; // { dg-error "" } -+} -diff --git a/gcc/testsuite/gcc.dg/redecl-21.c b/gcc/testsuite/gcc.dg/redecl-21.c -new file mode 100644 -index 000000000..2f2a6548a ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/redecl-21.c -@@ -0,0 +1,9 @@ -+/* We used to ICE in the gimplifier, PR 106765 */ -+/* { dg-do compile } */ -+/* { dg-options "-w" } */ -+struct a { -+ int b -+} c() { -+ struct a a; // { dg-note "" } -+ a.b; -+ d a; // { dg-error "" } --- -2.33.0 - diff --git a/0241-Backport-SME-aarch64-Remove-expected-error-for-compo.patch b/0241-Backport-SME-aarch64-Remove-expected-error-for-compo.patch deleted file mode 100644 index 5b03a5d..0000000 --- a/0241-Backport-SME-aarch64-Remove-expected-error-for-compo.patch +++ /dev/null @@ -1,43 +0,0 @@ -From bc42a8bdab7b2ffeb81441c7c8a9a1215d8502ee Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Thu, 26 Jan 2023 15:51:00 +0000 -Subject: [PATCH 142/157] [Backport][SME] aarch64: Remove expected error for - compound literals - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=96fbe541481fcc7d1a8884fb8dbefd7979eb9543 - -GCC no longer treats empty compound literals as an error -(see 14cfa01755a66afbae2539f8b5796c960ddcecc6). - -gcc/testsuite/ - * gcc.target/aarch64/bfloat16_scalar_typecheck.c: Accept empty - compound literals. ---- - gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c -index 7c9188cf2..f4ae68028 100644 ---- a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c -+++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c -@@ -40,7 +40,7 @@ bfloat16_t footest (bfloat16_t scalar0) - short initi_1_4 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ - double initi_1_5 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ - -- bfloat16_t scalar2_1 = {}; /* { dg-error {empty scalar initializer} } */ -+ bfloat16_t scalar2_1 = {}; - bfloat16_t scalar2_2 = { glob_bfloat }; - bfloat16_t scalar2_3 = { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ - bfloat16_t scalar2_4 = { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -@@ -92,7 +92,7 @@ bfloat16_t footest (bfloat16_t scalar0) - - /* Compound literals. */ - -- (bfloat16_t) {}; /* { dg-error {empty scalar initializer} } */ -+ (bfloat16_t) {}; - (bfloat16_t) { glob_bfloat }; - (bfloat16_t) { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ - (bfloat16_t) { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ --- -2.33.0 - diff --git a/0242-Backport-SME-aarch64-Remove-redundant-builtins-code.patch b/0242-Backport-SME-aarch64-Remove-redundant-builtins-code.patch deleted file mode 100644 index 85f8c59..0000000 --- a/0242-Backport-SME-aarch64-Remove-redundant-builtins-code.patch +++ /dev/null @@ -1,264 +0,0 @@ -From 42bfa9a26205da222cebbe830168b6f0b5e668b4 Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Tue, 12 Jul 2022 12:59:25 +0100 -Subject: [PATCH 143/157] [Backport][SME] aarch64: Remove redundant builtins - code - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e80daf04c8888f527d2fc7f6cbcd1b4c853dcd04 - -aarch64_builtin_vectorized_function handles some built-in functions -that already have equivalent internal functions. This seems to be -redundant now, since the target builtins that it chooses are mapped -to the same optab patterns as the internal functions. - -gcc/ - * config/aarch64/aarch64-builtins.cc - (aarch64_builtin_vectorized_function): Remove handling of - floor, ceil, trunc, round, nearbyint, sqrt, clz and ctz. - -gcc/testsuite/ - * gcc.target/aarch64/vect_unary_1.c: New test. ---- - gcc/config/aarch64/aarch64-builtins.cc | 32 --- - .../gcc.target/aarch64/vect_unary_1.c | 186 ++++++++++++++++++ - 2 files changed, 186 insertions(+), 32 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/aarch64/vect_unary_1.c - -diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc -index 37bb3af48..23a84cd53 100644 ---- a/gcc/config/aarch64/aarch64-builtins.cc -+++ b/gcc/config/aarch64/aarch64-builtins.cc -@@ -2653,38 +2653,6 @@ aarch64_builtin_vectorized_function (unsigned int fn, tree type_out, - switch (fn) - { - #undef AARCH64_CHECK_BUILTIN_MODE --#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ -- (out_mode == V##C##N##Fmode && in_mode == V##C##N##Fmode) -- CASE_CFN_FLOOR: -- return AARCH64_FIND_FRINT_VARIANT (floor); -- CASE_CFN_CEIL: -- return AARCH64_FIND_FRINT_VARIANT (ceil); -- CASE_CFN_TRUNC: -- return AARCH64_FIND_FRINT_VARIANT (btrunc); -- CASE_CFN_ROUND: -- return AARCH64_FIND_FRINT_VARIANT (round); -- CASE_CFN_NEARBYINT: -- return AARCH64_FIND_FRINT_VARIANT (nearbyint); -- CASE_CFN_SQRT: -- return AARCH64_FIND_FRINT_VARIANT (sqrt); --#undef AARCH64_CHECK_BUILTIN_MODE --#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ -- (out_mode == V##C##SImode && in_mode == V##C##N##Imode) -- CASE_CFN_CLZ: -- { -- if (AARCH64_CHECK_BUILTIN_MODE (4, S)) -- return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si]; -- return NULL_TREE; -- } -- CASE_CFN_CTZ: -- { -- if (AARCH64_CHECK_BUILTIN_MODE (2, S)) -- return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv2si]; -- else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) -- return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv4si]; -- return NULL_TREE; -- } --#undef AARCH64_CHECK_BUILTIN_MODE - #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ - (out_mode == V##C##N##Imode && in_mode == V##C##N##Fmode) - CASE_CFN_IFLOOR: -diff --git a/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c b/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c -new file mode 100644 -index 000000000..8516808be ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c -@@ -0,0 +1,186 @@ -+/* { dg-options "-O3 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "" } } */ -+ -+#include -+ -+#define TEST2(OUT, NAME, IN) \ -+OUT __attribute__((vector_size(sizeof(OUT) * 2))) \ -+test2_##OUT##_##NAME##_##IN (float dummy, \ -+ IN __attribute__((vector_size(sizeof(IN) * 2))) y) \ -+{ \ -+ OUT __attribute__((vector_size(sizeof(OUT) * 2))) x; \ -+ x[0] = __builtin_##NAME (y[0]); \ -+ x[1] = __builtin_##NAME (y[1]); \ -+ return x; \ -+} \ -+ -+#define TEST4(OUT, NAME, IN) \ -+OUT __attribute__((vector_size(16))) \ -+test4_##OUT##_##NAME##_##IN (float dummy, \ -+ IN __attribute__((vector_size(16))) y) \ -+{ \ -+ OUT __attribute__((vector_size(16))) x; \ -+ x[0] = __builtin_##NAME (y[0]); \ -+ x[1] = __builtin_##NAME (y[1]); \ -+ x[2] = __builtin_##NAME (y[2]); \ -+ x[3] = __builtin_##NAME (y[3]); \ -+ return x; \ -+} \ -+ -+/* -+** test2_float_truncf_float: -+** frintz v0.2s, v1.2s -+** ret -+*/ -+TEST2 (float, truncf, float) -+ -+/* -+** test2_double_trunc_double: -+** frintz v0.2d, v1.2d -+** ret -+*/ -+TEST2 (double, trunc, double) -+ -+/* -+** test4_float_truncf_float: -+** frintz v0.4s, v1.4s -+** ret -+*/ -+TEST4 (float, truncf, float) -+ -+/* -+** test2_float_roundf_float: -+** frinta v0.2s, v1.2s -+** ret -+*/ -+TEST2 (float, roundf, float) -+ -+/* -+** test2_double_round_double: -+** frinta v0.2d, v1.2d -+** ret -+*/ -+TEST2 (double, round, double) -+ -+/* -+** test4_float_roundf_float: -+** frinta v0.4s, v1.4s -+** ret -+*/ -+TEST4 (float, roundf, float) -+ -+/* -+** test2_float_nearbyintf_float: -+** frinti v0.2s, v1.2s -+** ret -+*/ -+TEST2 (float, nearbyintf, float) -+ -+/* -+** test2_double_nearbyint_double: -+** frinti v0.2d, v1.2d -+** ret -+*/ -+TEST2 (double, nearbyint, double) -+ -+/* -+** test4_float_nearbyintf_float: -+** frinti v0.4s, v1.4s -+** ret -+*/ -+TEST4 (float, nearbyintf, float) -+ -+/* -+** test2_float_floorf_float: -+** frintm v0.2s, v1.2s -+** ret -+*/ -+TEST2 (float, floorf, float) -+ -+/* -+** test2_double_floor_double: -+** frintm v0.2d, v1.2d -+** ret -+*/ -+TEST2 (double, floor, double) -+ -+/* -+** test4_float_floorf_float: -+** frintm v0.4s, v1.4s -+** ret -+*/ -+TEST4 (float, floorf, float) -+ -+/* -+** test2_float_ceilf_float: -+** frintp v0.2s, v1.2s -+** ret -+*/ -+TEST2 (float, ceilf, float) -+ -+/* -+** test2_double_ceil_double: -+** frintp v0.2d, v1.2d -+** ret -+*/ -+TEST2 (double, ceil, double) -+ -+/* -+** test4_float_ceilf_float: -+** frintp v0.4s, v1.4s -+** ret -+*/ -+TEST4 (float, ceilf, float) -+ -+/* -+** test2_float_rintf_float: -+** frintx v0.2s, v1.2s -+** ret -+*/ -+TEST2 (float, rintf, float) -+ -+/* -+** test2_double_rint_double: -+** frintx v0.2d, v1.2d -+** ret -+*/ -+TEST2 (double, rint, double) -+ -+/* -+** test4_float_rintf_float: -+** frintx v0.4s, v1.4s -+** ret -+*/ -+TEST4 (float, rintf, float) -+ -+/* -+** test2_int_clz_int: -+** clz v0.2s, v1.2s -+** ret -+*/ -+TEST2 (int, clz, int) -+ -+/* -+** test4_int_clz_int: -+** clz v0.4s, v1.4s -+** ret -+*/ -+TEST4 (int, clz, int) -+ -+/* -+** test2_int_ctz_int: -+** rev32 (v[0-9]+).8b, v1.8b -+** rbit (v[0-9]+).8b, \1.8b -+** clz v0.2s, \2.2s -+** ret -+*/ -+TEST2 (int, ctz, int) -+ -+/* -+** test4_int_ctz_int: -+** rev32 (v[0-9]+).16b, v1.16b -+** rbit (v[0-9]+).16b, \1.16b -+** clz v0.4s, \2.4s -+** ret -+*/ -+TEST4 (int, ctz, int) --- -2.33.0 - diff --git a/0243-Backport-SME-AArch64-Fix-Armv9-a-warnings-that-get-e.patch b/0243-Backport-SME-AArch64-Fix-Armv9-a-warnings-that-get-e.patch deleted file mode 100644 index 0e4b80b..0000000 --- a/0243-Backport-SME-AArch64-Fix-Armv9-a-warnings-that-get-e.patch +++ /dev/null @@ -1,63 +0,0 @@ -From a1ba437195286af3389ba9f2d43b8cb6c73ba3d8 Mon Sep 17 00:00:00 2001 -From: Tamar Christina -Date: Thu, 12 Oct 2023 15:55:58 +0100 -Subject: [PATCH 144/157] [Backport][SME] AArch64: Fix Armv9-a warnings that - get emitted whenever a ACLE header is used. - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=de593b3cffdc9c030c5e697ad9092b1b085dabc4 - -At the moment, trying to use -march=armv9-a with any ACLE header such as -arm_neon.h results in rows and rows of warnings saying: - -: warning: "__ARM_ARCH" redefined -: note: this is the location of the previous definition - -This is obviously not useful and happens because the header was defined at -__ARM_ARCH == 8 and the commandline changes it. - -The Arm port solves this by undef the macro during argument processing and we do -the same on AArch64 for the majority of macros. However we define this macro -using a different helper which requires the manual undef. - -Thanks, -Tamar - -gcc/ChangeLog: - - * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Add undef. - -gcc/testsuite/ChangeLog: - - * gcc.target/aarch64/armv9_warning.c: New test. ---- - gcc/config/aarch64/aarch64-c.cc | 1 + - gcc/testsuite/gcc.target/aarch64/armv9_warning.c | 5 +++++ - 2 files changed, 6 insertions(+) - create mode 100644 gcc/testsuite/gcc.target/aarch64/armv9_warning.c - -diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc -index 745719d8b..2d2ac42c4 100644 ---- a/gcc/config/aarch64/aarch64-c.cc -+++ b/gcc/config/aarch64/aarch64-c.cc -@@ -129,6 +129,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) - { - aarch64_def_or_undef (flag_unsafe_math_optimizations, "__ARM_FP_FAST", pfile); - -+ cpp_undef (pfile, "__ARM_ARCH"); - builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9A ? 9 : 8); - - builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM", -diff --git a/gcc/testsuite/gcc.target/aarch64/armv9_warning.c b/gcc/testsuite/gcc.target/aarch64/armv9_warning.c -new file mode 100644 -index 000000000..35690d5bc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/armv9_warning.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-march=armv9-a -Wpedantic -Werror" } */ -+ -+#include -+ --- -2.33.0 - diff --git a/0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch b/0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch deleted file mode 100644 index aab1bb6..0000000 --- a/0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch +++ /dev/null @@ -1,184 +0,0 @@ -From f6652dbebf81372884e9fd8b68627fc7a94d8d3b Mon Sep 17 00:00:00 2001 -From: Roger Sayle -Date: Fri, 27 May 2022 08:57:46 +0100 -Subject: [PATCH 145/157] [Backport][SME] Canonicalize X&-Y as X*Y in match.pd - when Y is [0,1]. - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=8fb94fc6097c0a934aac0d89c9c5e2038da67655 - -"For every pessimization, there's an equal and opposite optimization". - -In the review of my original patch for PR middle-end/98865, Richard -Biener pointed out that match.pd shouldn't be transforming X*Y into -X&-Y as the former is considered cheaper by tree-ssa's cost model -(operator count). A corollary of this is that we should instead be -transforming X&-Y into the cheaper X*Y as a preferred canonical form -(especially as RTL expansion now intelligently selects the appropriate -implementation based on the target's costs). - -With this patch we now generate identical code for: -int foo(int x, int y) { return -(x&1) & y; } -int bar(int x, int y) { return (x&1) * y; } - -specifically on x86_64-pc-linux-gnu both use and/neg/and with -O2, -but both use and/mul with -Os. - -One minor wrinkle/improvement is that this patch includes three -additional optimizations (that account for the change in canonical -form) to continue to optimize PR92834 and PR94786. - -2022-05-27 Roger Sayle - -gcc/ChangeLog - * match.pd (match_zero_one_valued_p): New predicate. - (mult @0 @1): Use zero_one_valued_p for optimization to the - expression "bit_and @0 @1". - (bit_and (negate zero_one_valued_p@0) @1): Optimize to MULT_EXPR. - (plus @0 (mult (minus @1 @0) zero_one_valued_p@2)): New transform. - (minus @0 (mult (minus @0 @1) zero_one_valued_p@2)): Likewise. - (bit_xor @0 (mult (bit_xor @0 @1) zero_one_valued_p@2)): Likewise. - Remove three redundant transforms obsoleted by the three above. - -gcc/testsuite/ChangeLog - * gcc.dg/pr98865.c: New test case. ---- - gcc/match.pd | 86 ++++++++++++++++------------------ - gcc/testsuite/gcc.dg/pr98865.c | 14 ++++++ - 2 files changed, 55 insertions(+), 45 deletions(-) - create mode 100644 gcc/testsuite/gcc.dg/pr98865.c - -diff --git a/gcc/match.pd b/gcc/match.pd -index aee58e47b..6d3165bcd 100644 ---- a/gcc/match.pd -+++ b/gcc/match.pd -@@ -285,14 +285,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - || !COMPLEX_FLOAT_TYPE_P (type))) - (negate @0))) - --/* Transform { 0 or 1 } * { 0 or 1 } into { 0 or 1 } & { 0 or 1 } */ --(simplify -- (mult SSA_NAME@1 SSA_NAME@2) -- (if (INTEGRAL_TYPE_P (type) -- && get_nonzero_bits (@1) == 1 -- && get_nonzero_bits (@2) == 1) -- (bit_and @1 @2))) -- - /* Transform x * { 0 or 1, 0 or 1, ... } into x & { 0 or -1, 0 or -1, ...}, - unless the target has native support for the former but not the latter. */ - (simplify -@@ -1790,6 +1782,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - (bit_not (bit_not @0)) - @0) - -+(match zero_one_valued_p -+ @0 -+ (if (INTEGRAL_TYPE_P (type) && tree_nonzero_bits (@0) == 1))) -+(match zero_one_valued_p -+ truth_valued_p@0) -+ -+/* Transform { 0 or 1 } * { 0 or 1 } into { 0 or 1 } & { 0 or 1 }. */ -+(simplify -+ (mult zero_one_valued_p@0 zero_one_valued_p@1) -+ (if (INTEGRAL_TYPE_P (type)) -+ (bit_and @0 @1))) -+ -+/* Transform X & -Y into X * Y when Y is { 0 or 1 }. */ -+(simplify -+ (bit_and:c (convert? (negate zero_one_valued_p@0)) @1) -+ (if (INTEGRAL_TYPE_P (type) -+ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) -+ && TREE_CODE (TREE_TYPE (@0)) != BOOLEAN_TYPE -+ && !TYPE_UNSIGNED (TREE_TYPE (@0))) -+ (mult (convert @0) @1))) -+ - /* Convert ~ (-A) to A - 1. */ - (simplify - (bit_not (convert? (negate @0))) -@@ -3281,44 +3294,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - (cmp @0 (minmax:c @0 @1)) - { constant_boolean_node (cmp == GE_EXPR || cmp == LE_EXPR, type); } )) - --/* Undo fancy way of writing max/min or other ?: expressions, -- like a - ((a - b) & -(a < b)), in this case into (a < b) ? b : a. -+/* Undo fancy ways of writing max/min or other ?: expressions, like -+ a - ((a - b) & -(a < b)) and a - (a - b) * (a < b) into (a < b) ? b : a. - People normally use ?: and that is what we actually try to optimize. */ --(for cmp (simple_comparison) -- (simplify -- (minus @0 (bit_and:c (minus @0 @1) -- (convert? (negate@4 (convert? (cmp@5 @2 @3)))))) -- (if (INTEGRAL_TYPE_P (type) -- && INTEGRAL_TYPE_P (TREE_TYPE (@4)) -- && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE -- && INTEGRAL_TYPE_P (TREE_TYPE (@5)) -- && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type) -- || !TYPE_UNSIGNED (TREE_TYPE (@4))) -- && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) -- (cond (cmp @2 @3) @1 @0))) -- (simplify -- (plus:c @0 (bit_and:c (minus @1 @0) -- (convert? (negate@4 (convert? (cmp@5 @2 @3)))))) -- (if (INTEGRAL_TYPE_P (type) -- && INTEGRAL_TYPE_P (TREE_TYPE (@4)) -- && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE -- && INTEGRAL_TYPE_P (TREE_TYPE (@5)) -- && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type) -- || !TYPE_UNSIGNED (TREE_TYPE (@4))) -- && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) -- (cond (cmp @2 @3) @1 @0))) -- /* Similarly with ^ instead of - though in that case with :c. */ -- (simplify -- (bit_xor:c @0 (bit_and:c (bit_xor:c @0 @1) -- (convert? (negate@4 (convert? (cmp@5 @2 @3)))))) -- (if (INTEGRAL_TYPE_P (type) -- && INTEGRAL_TYPE_P (TREE_TYPE (@4)) -- && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE -- && INTEGRAL_TYPE_P (TREE_TYPE (@5)) -- && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type) -- || !TYPE_UNSIGNED (TREE_TYPE (@4))) -- && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) -- (cond (cmp @2 @3) @1 @0)))) -+/* Transform A + (B-A)*cmp into cmp ? B : A. */ -+(simplify -+ (plus:c @0 (mult:c (minus @1 @0) zero_one_valued_p@2)) -+ (if (INTEGRAL_TYPE_P (type) -+ && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) -+ (cond (convert:boolean_type_node @2) @1 @0))) -+/* Transform A - (A-B)*cmp into cmp ? B : A. */ -+(simplify -+ (minus @0 (mult:c (minus @0 @1) zero_one_valued_p@2)) -+ (if (INTEGRAL_TYPE_P (type) -+ && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) -+ (cond (convert:boolean_type_node @2) @1 @0))) -+/* Transform A ^ (A^B)*cmp into cmp ? B : A. */ -+(simplify -+ (bit_xor:c @0 (mult:c (bit_xor:c @0 @1) zero_one_valued_p@2)) -+ (if (INTEGRAL_TYPE_P (type) -+ && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) -+ (cond (convert:boolean_type_node @2) @1 @0))) - - /* Simplifications of shift and rotates. */ - -diff --git a/gcc/testsuite/gcc.dg/pr98865.c b/gcc/testsuite/gcc.dg/pr98865.c -new file mode 100644 -index 000000000..95f727033 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/pr98865.c -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-optimized" } */ -+ -+int foo(int x, int y) -+{ -+ return -(x&1) & y; -+} -+ -+int bar(int x, int y) -+{ -+ return (x&1) * y; -+} -+ -+/* { dg-final { scan-tree-dump-times " \\* " 2 "optimized" } } */ --- -2.33.0 - diff --git a/0245-Backport-SME-middle-end-Add-new-tbranch-optab-to-add.patch b/0245-Backport-SME-middle-end-Add-new-tbranch-optab-to-add.patch deleted file mode 100644 index 229b797..0000000 --- a/0245-Backport-SME-middle-end-Add-new-tbranch-optab-to-add.patch +++ /dev/null @@ -1,417 +0,0 @@ -From a8f10b4b73c2624599765edf7ff19d53eca15135 Mon Sep 17 00:00:00 2001 -From: Tamar Christina -Date: Mon, 12 Dec 2022 15:16:50 +0000 -Subject: [PATCH 146/157] [Backport][SME] middle-end: Add new tbranch optab to - add support for bit-test-and-branch operations - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dc582d2ef32e2d3723c68d111f4e49607631f34d - -This adds a new test-and-branch optab that can be used to do a conditional test -of a bit and branch. This is similar to the cbranch optab but instead can -test any arbitrary bit inside the register. - -This patch recognizes boolean comparisons and single bit mask tests. - -gcc/ChangeLog: - - * dojump.cc (do_jump): Pass along value. - (do_jump_by_parts_greater_rtx): Likewise. - (do_jump_by_parts_zero_rtx): Likewise. - (do_jump_by_parts_equality_rtx): Likewise. - (do_compare_rtx_and_jump): Likewise. - (do_compare_and_jump): Likewise. - * dojump.h (do_compare_rtx_and_jump): New. - * optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check. - (validate_test_and_branch): New. - (emit_cmp_and_jump_insns): Optiobally take a value, and when value is - supplied then check if it's suitable for tbranch. - * optabs.def (tbranch_eq$a4, tbranch_ne$a4): New. - * doc/md.texi (tbranch_@var{op}@var{mode}4): Document it. - * optabs.h (emit_cmp_and_jump_insns): New. - * tree.h (tree_zero_one_valued_p): New. ---- - gcc/doc/md.texi | 7 +++ - gcc/dojump.cc | 52 +++++++++++++++------- - gcc/dojump.h | 4 ++ - gcc/optabs.cc | 114 ++++++++++++++++++++++++++++++++++++++++++++---- - gcc/optabs.def | 2 + - gcc/optabs.h | 4 ++ - gcc/tree.h | 1 + - 7 files changed, 159 insertions(+), 25 deletions(-) - -diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi -index c0cf0ec64..2193900e7 100644 ---- a/gcc/doc/md.texi -+++ b/gcc/doc/md.texi -@@ -7299,6 +7299,13 @@ case, you can and should make operand 1's predicate reject some operators - in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether - from the machine description. - -+@cindex @code{tbranch_@var{op}@var{mode}3} instruction pattern -+@item @samp{tbranch_@var{op}@var{mode}3} -+Conditional branch instruction combined with a bit test-and-compare -+instruction. Operand 0 is the operand of the comparison. Operand 1 is the bit -+position of Operand 1 to test. Operand 3 is the @code{code_label} to jump to. -+@var{op} is one of @var{eq} or @var{ne}. -+ - @cindex @code{cbranch@var{mode}4} instruction pattern - @item @samp{cbranch@var{mode}4} - Conditional branch instruction combined with a compare instruction. -diff --git a/gcc/dojump.cc b/gcc/dojump.cc -index 0c880d653..604b28537 100644 ---- a/gcc/dojump.cc -+++ b/gcc/dojump.cc -@@ -621,7 +621,7 @@ do_jump (tree exp, rtx_code_label *if_false_label, - } - do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)), - NE, TYPE_UNSIGNED (TREE_TYPE (exp)), -- GET_MODE (temp), NULL_RTX, -+ exp, GET_MODE (temp), NULL_RTX, - if_false_label, if_true_label, prob); - } - -@@ -689,7 +689,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0, - - /* All but high-order word must be compared as unsigned. */ - do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0), -- word_mode, NULL_RTX, NULL, if_true_label, -+ NULL, word_mode, NULL_RTX, NULL, if_true_label, - prob); - - /* Emit only one comparison for 0. Do not emit the last cond jump. */ -@@ -697,8 +697,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0, - break; - - /* Consider lower words only if these are equal. */ -- do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode, -- NULL_RTX, NULL, if_false_label, -+ do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL, -+ word_mode, NULL_RTX, NULL, if_false_label, - prob.invert ()); - } - -@@ -757,7 +757,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0, - - if (part != 0) - { -- do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode, -+ do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode, - NULL_RTX, if_false_label, if_true_label, prob); - return; - } -@@ -768,7 +768,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0, - - for (i = 0; i < nwords; i++) - do_compare_rtx_and_jump (operand_subword_force (op0, i, mode), -- const0_rtx, EQ, 1, word_mode, NULL_RTX, -+ const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX, - if_false_label, NULL, prob); - - if (if_true_label) -@@ -811,8 +811,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1, - - for (i = 0; i < nwords; i++) - do_compare_rtx_and_jump (operand_subword_force (op0, i, mode), -- operand_subword_force (op1, i, mode), -- EQ, 0, word_mode, NULL_RTX, -+ operand_subword_force (op1, i, mode), -+ EQ, 0, NULL, word_mode, NULL_RTX, - if_false_label, NULL, prob); - - if (if_true_label) -@@ -964,6 +964,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, - rtx_code_label *if_false_label, - rtx_code_label *if_true_label, - profile_probability prob) -+{ -+ do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size, -+ if_false_label, if_true_label, prob); -+} -+ -+/* Like do_compare_and_jump but expects the values to compare as two rtx's. -+ The decision as to signed or unsigned comparison must be made by the caller. -+ -+ If MODE is BLKmode, SIZE is an RTX giving the size of the objects being -+ compared. */ -+ -+void -+do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, -+ tree val, machine_mode mode, rtx size, -+ rtx_code_label *if_false_label, -+ rtx_code_label *if_true_label, -+ profile_probability prob) - { - rtx tem; - rtx_code_label *dummy_label = NULL; -@@ -1179,8 +1196,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, - } - else - dest_label = if_false_label; -- do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode, -- size, dest_label, NULL, first_prob); -+ -+ do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, -+ val, mode, size, dest_label, NULL, -+ first_prob); - } - /* For !and_them we want to split: - if (x) goto t; // prob; -@@ -1194,8 +1213,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, - else - { - profile_probability first_prob = prob.split (cprob); -- do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode, -- size, NULL, if_true_label, first_prob); -+ do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, -+ val, mode, size, NULL, -+ if_true_label, first_prob); - if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump)) - { - /* x != y can be split into x unord y || x ltgt y -@@ -1217,7 +1237,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp, - } - } - -- emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, -+ emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val, - if_true_label, prob); - } - -@@ -1291,9 +1311,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code, - op1 = new_op1; - } - -- do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, -- ((mode == BLKmode) -- ? expr_size (treeop0) : NULL_RTX), -+ do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode, -+ ((mode == BLKmode) -+ ? expr_size (treeop0) : NULL_RTX), - if_false_label, if_true_label, prob); - } - -diff --git a/gcc/dojump.h b/gcc/dojump.h -index e379cceb3..d1d79c490 100644 ---- a/gcc/dojump.h -+++ b/gcc/dojump.h -@@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label, - extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *, - profile_probability); - -+extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree, -+ machine_mode, rtx, rtx_code_label *, -+ rtx_code_label *, profile_probability); -+ - extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, - machine_mode, rtx, rtx_code_label *, - rtx_code_label *, profile_probability); -diff --git a/gcc/optabs.cc b/gcc/optabs.cc -index 3d8fa3abd..b441137de 100644 ---- a/gcc/optabs.cc -+++ b/gcc/optabs.cc -@@ -46,6 +46,8 @@ along with GCC; see the file COPYING3. If not see - #include "libfuncs.h" - #include "internal-fn.h" - #include "langhooks.h" -+#include "gimple.h" -+#include "ssa.h" - - static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *, - machine_mode *); -@@ -4621,7 +4623,8 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode, - - static void - emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label, -- profile_probability prob) -+ direct_optab cmp_optab, profile_probability prob, -+ bool test_branch) - { - machine_mode optab_mode; - enum mode_class mclass; -@@ -4630,12 +4633,17 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label, - - mclass = GET_MODE_CLASS (mode); - optab_mode = (mclass == MODE_CC) ? CCmode : mode; -- icode = optab_handler (cbranch_optab, optab_mode); -+ icode = optab_handler (cmp_optab, optab_mode); - - gcc_assert (icode != CODE_FOR_nothing); -- gcc_assert (insn_operand_matches (icode, 0, test)); -- insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0), -- XEXP (test, 1), label)); -+ gcc_assert (test_branch || insn_operand_matches (icode, 0, test)); -+ if (test_branch) -+ insn = emit_jump_insn (GEN_FCN (icode) (XEXP (test, 0), -+ XEXP (test, 1), label)); -+ else -+ insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0), -+ XEXP (test, 1), label)); -+ - if (prob.initialized_p () - && profile_status_for_fn (cfun) != PROFILE_ABSENT - && insn -@@ -4645,6 +4653,68 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label, - add_reg_br_prob_note (insn, prob); - } - -+/* PTEST points to a comparison that compares its first operand with zero. -+ Check to see if it can be performed as a bit-test-and-branch instead. -+ On success, return the instruction that performs the bit-test-and-branch -+ and replace the second operand of *PTEST with the bit number to test. -+ On failure, return CODE_FOR_nothing and leave *PTEST unchanged. -+ -+ Note that the comparison described by *PTEST should not be taken -+ literally after a successful return. *PTEST is just a convenient -+ place to store the two operands of the bit-and-test. -+ -+ VAL must contain the original tree expression for the first operand -+ of *PTEST. */ -+ -+static enum insn_code -+validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode, optab *res) -+{ -+ if (!val || TREE_CODE (val) != SSA_NAME) -+ return CODE_FOR_nothing; -+ -+ machine_mode mode = TYPE_MODE (TREE_TYPE (val)); -+ rtx test = *ptest; -+ direct_optab optab; -+ -+ if (GET_CODE (test) == EQ) -+ optab = tbranch_eq_optab; -+ else if (GET_CODE (test) == NE) -+ optab = tbranch_ne_optab; -+ else -+ return CODE_FOR_nothing; -+ -+ *res = optab; -+ -+ /* If the target supports the testbit comparison directly, great. */ -+ auto icode = direct_optab_handler (optab, mode); -+ if (icode == CODE_FOR_nothing) -+ return icode; -+ -+ if (tree_zero_one_valued_p (val)) -+ { -+ auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0; -+ XEXP (test, 1) = gen_int_mode (pos, mode); -+ *ptest = test; -+ *pmode = mode; -+ return icode; -+ } -+ -+ wide_int wcst = get_nonzero_bits (val); -+ if (wcst == -1) -+ return CODE_FOR_nothing; -+ -+ int bitpos; -+ -+ if ((bitpos = wi::exact_log2 (wcst)) == -1) -+ return CODE_FOR_nothing; -+ -+ auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos; -+ XEXP (test, 1) = gen_int_mode (pos, mode); -+ *ptest = test; -+ *pmode = mode; -+ return icode; -+} -+ - /* Generate code to compare X with Y so that the condition codes are - set and to jump to LABEL if the condition is true. If X is a - constant and Y is not a constant, then the comparison is swapped to -@@ -4662,11 +4732,13 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label, - It will be potentially converted into an unsigned variant based on - UNSIGNEDP to select a proper jump instruction. - -- PROB is the probability of jumping to LABEL. */ -+ PROB is the probability of jumping to LABEL. If the comparison is against -+ zero then VAL contains the expression from which the non-zero RTL is -+ derived. */ - - void - emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size, -- machine_mode mode, int unsignedp, rtx label, -+ machine_mode mode, int unsignedp, tree val, rtx label, - profile_probability prob) - { - rtx op0 = x, op1 = y; -@@ -4691,10 +4763,34 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size, - - prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN, - &test, &mode); -- emit_cmp_and_jump_insn_1 (test, mode, label, prob); -+ -+ /* Check if we're comparing a truth type with 0, and if so check if -+ the target supports tbranch. */ -+ machine_mode tmode = mode; -+ direct_optab optab; -+ if (op1 == CONST0_RTX (GET_MODE (op1)) -+ && validate_test_and_branch (val, &test, &tmode, -+ &optab) != CODE_FOR_nothing) -+ { -+ emit_cmp_and_jump_insn_1 (test, tmode, label, optab, prob, true); -+ return; -+ } -+ -+ emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob, false); - } - -- -+/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown. */ -+ -+void -+emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size, -+ machine_mode mode, int unsignedp, rtx label, -+ profile_probability prob) -+{ -+ emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL, -+ label, prob); -+} -+ -+ - /* Emit a library call comparison between floating point X and Y. - COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.). */ - -diff --git a/gcc/optabs.def b/gcc/optabs.def -index 801310eba..dbf529434 100644 ---- a/gcc/optabs.def -+++ b/gcc/optabs.def -@@ -220,6 +220,8 @@ OPTAB_D (reload_in_optab, "reload_in$a") - OPTAB_D (reload_out_optab, "reload_out$a") - - OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE) -+OPTAB_D (tbranch_eq_optab, "tbranch_eq$a3") -+OPTAB_D (tbranch_ne_optab, "tbranch_ne$a3") - OPTAB_D (addcc_optab, "add$acc") - OPTAB_D (negcc_optab, "neg$acc") - OPTAB_D (notcc_optab, "not$acc") -diff --git a/gcc/optabs.h b/gcc/optabs.h -index cfd7c742d..cd55604bc 100644 ---- a/gcc/optabs.h -+++ b/gcc/optabs.h -@@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx, - machine_mode, int, rtx, - profile_probability prob - = profile_probability::uninitialized ()); -+extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx, -+ machine_mode, int, tree, rtx, -+ profile_probability prob -+ = profile_probability::uninitialized ()); - - /* Generate code to indirectly jump to a location given in the rtx LOC. */ - extern void emit_indirect_jump (rtx); -diff --git a/gcc/tree.h b/gcc/tree.h -index 3ff7732dc..07af584d6 100644 ---- a/gcc/tree.h -+++ b/gcc/tree.h -@@ -4627,6 +4627,7 @@ extern tree signed_or_unsigned_type_for (int, tree); - extern tree signed_type_for (tree); - extern tree unsigned_type_for (tree); - extern bool is_truth_type_for (tree, tree); -+extern bool tree_zero_one_valued_p (tree); - extern tree truth_type_for (tree); - extern tree build_pointer_type_for_mode (tree, machine_mode, bool); - extern tree build_pointer_type (tree); --- -2.33.0 - diff --git a/0246-Backport-SME-explow-Allow-dynamic-allocations-after-.patch b/0246-Backport-SME-explow-Allow-dynamic-allocations-after-.patch deleted file mode 100644 index fdefe48..0000000 --- a/0246-Backport-SME-explow-Allow-dynamic-allocations-after-.patch +++ /dev/null @@ -1,110 +0,0 @@ -From fe64cc72e6221cf05e40bc868287bd1fcf07479f Mon Sep 17 00:00:00 2001 -From: Richard Sandiford -Date: Mon, 6 Nov 2023 10:49:58 +0000 -Subject: [PATCH 147/157] [Backport][SME] explow: Allow dynamic allocations - after vregs - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a7a45364dddef399bfb550235166df51108a3142 - -This patch allows allocate_dynamic_stack_space to be called before -or after virtual registers have been instantiated. It uses the -same approach as allocate_stack_local, which already supported this. - -gcc/ - * function.h (get_stack_dynamic_offset): Declare. - * function.cc (get_stack_dynamic_offset): New function, - split out from... - (get_stack_dynamic_offset): ...here. - * explow.cc (allocate_dynamic_stack_space): Handle calls made - after virtual registers have been instantiated. ---- - gcc/explow.cc | 10 +++++++--- - gcc/function.cc | 12 +++++++++++- - gcc/function.h | 1 + - 3 files changed, 19 insertions(+), 4 deletions(-) - -diff --git a/gcc/explow.cc b/gcc/explow.cc -index d8aa75ee9..124e8f49e 100644 ---- a/gcc/explow.cc -+++ b/gcc/explow.cc -@@ -1375,12 +1375,16 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, - HOST_WIDE_INT stack_usage_size = -1; - rtx_code_label *final_label; - rtx final_target, target; -+ rtx addr = (virtuals_instantiated -+ ? plus_constant (Pmode, stack_pointer_rtx, -+ get_stack_dynamic_offset ()) -+ : virtual_stack_dynamic_rtx); - - /* If we're asking for zero bytes, it doesn't matter what we point - to since we can't dereference it. But return a reasonable - address anyway. */ - if (size == const0_rtx) -- return virtual_stack_dynamic_rtx; -+ return addr; - - /* Otherwise, show we're calling alloca or equivalent. */ - cfun->calls_alloca = 1; -@@ -1532,7 +1536,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, - poly_int64 saved_stack_pointer_delta; - - if (!STACK_GROWS_DOWNWARD) -- emit_move_insn (target, virtual_stack_dynamic_rtx); -+ emit_move_insn (target, force_operand (addr, target)); - - /* Check stack bounds if necessary. */ - if (crtl->limit_stack) -@@ -1575,7 +1579,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, - stack_pointer_delta = saved_stack_pointer_delta; - - if (STACK_GROWS_DOWNWARD) -- emit_move_insn (target, virtual_stack_dynamic_rtx); -+ emit_move_insn (target, force_operand (addr, target)); - } - - suppress_reg_args_size = false; -diff --git a/gcc/function.cc b/gcc/function.cc -index f4fc211a0..e62b2a8d6 100644 ---- a/gcc/function.cc -+++ b/gcc/function.cc -@@ -1945,6 +1945,16 @@ instantiate_decls (tree fndecl) - vec_free (cfun->local_decls); - } - -+/* Return the value of STACK_DYNAMIC_OFFSET for the current function. -+ This is done through a function wrapper so that the macro sees a -+ predictable set of included files. */ -+ -+poly_int64 -+get_stack_dynamic_offset () -+{ -+ return STACK_DYNAMIC_OFFSET (current_function_decl); -+} -+ - /* Pass through the INSNS of function FNDECL and convert virtual register - references to hard register references. */ - -@@ -1956,7 +1966,7 @@ instantiate_virtual_regs (void) - /* Compute the offsets to use for this function. */ - in_arg_offset = FIRST_PARM_OFFSET (current_function_decl); - var_offset = targetm.starting_frame_offset (); -- dynamic_offset = STACK_DYNAMIC_OFFSET (current_function_decl); -+ dynamic_offset = get_stack_dynamic_offset (); - out_arg_offset = STACK_POINTER_OFFSET; - #ifdef FRAME_POINTER_CFA_OFFSET - cfa_offset = FRAME_POINTER_CFA_OFFSET (current_function_decl); -diff --git a/gcc/function.h b/gcc/function.h -index 4e8131706..a5846465a 100644 ---- a/gcc/function.h -+++ b/gcc/function.h -@@ -711,6 +711,7 @@ extern vec convert_jumps_to_returns (basic_block last_bb, bool simple_p, - extern basic_block emit_return_for_exit (edge exit_fallthru_edge, - bool simple_p); - extern void reposition_prologue_and_epilogue_notes (void); -+extern poly_int64 get_stack_dynamic_offset (); - - /* Returns the name of the current function. */ - extern const char *fndecl_name (tree); --- -2.33.0 - diff --git a/0247-Backport-SME-PR105169-Fix-references-to-discarded-se.patch b/0247-Backport-SME-PR105169-Fix-references-to-discarded-se.patch deleted file mode 100644 index 93a7e3a..0000000 --- a/0247-Backport-SME-PR105169-Fix-references-to-discarded-se.patch +++ /dev/null @@ -1,225 +0,0 @@ -From 6cc7bcadadbc2521a2db4c02adfe066d805e37ef Mon Sep 17 00:00:00 2001 -From: Giuliano Belinassi -Date: Fri, 6 May 2022 23:37:52 -0300 -Subject: [PATCH 148/157] [Backport][SME] PR105169 Fix references to discarded - sections - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7a3f38a966a52893fb5bae301a1a3d56961358fb - -When -fpatchable-function-entry= is enabled, certain C++ codes fails to -link because of generated references to discarded sections in -__patchable_function_entry section. This commit fixes this problem by -puting those references in a COMDAT section. - -2022-05-06 Giuliano Belinassi - -gcc/ChangeLog - PR c++/105169 - * targhooks.cc (default_print_patchable_function_entry_1): Handle COMDAT case. - * varasm.cc (switch_to_comdat_section): New - (handle_vtv_comdat_section): Call switch_to_comdat_section. - * varasm.h: Declare switch_to_comdat_section. - -gcc/testsuite/ChangeLog -2022-05-06 Giuliano Belinassi - - PR c++/105169 - * g++.dg/modules/pr105169.h: New file. - * g++.dg/modules/pr105169_a.C: New test. - * g++.dg/modules/pr105169_b.C: New file. ---- - gcc/targhooks.cc | 8 ++++-- - gcc/testsuite/g++.dg/modules/pr105169.h | 22 +++++++++++++++ - gcc/testsuite/g++.dg/modules/pr105169_a.C | 25 +++++++++++++++++ - gcc/testsuite/g++.dg/modules/pr105169_b.C | 12 +++++++++ - gcc/varasm.cc | 33 ++++++++++++++--------- - gcc/varasm.h | 2 ++ - 6 files changed, 87 insertions(+), 15 deletions(-) - create mode 100644 gcc/testsuite/g++.dg/modules/pr105169.h - create mode 100644 gcc/testsuite/g++.dg/modules/pr105169_a.C - create mode 100644 gcc/testsuite/g++.dg/modules/pr105169_b.C - -diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc -index c88afa5db..175a0e18a 100644 ---- a/gcc/targhooks.cc -+++ b/gcc/targhooks.cc -@@ -2019,8 +2019,12 @@ default_print_patchable_function_entry_1 (FILE *file, - patch_area_number++; - ASM_GENERATE_INTERNAL_LABEL (buf, "LPFE", patch_area_number); - -- switch_to_section (get_section ("__patchable_function_entries", -- flags, current_function_decl)); -+ section *sect = get_section ("__patchable_function_entries", -+ flags, current_function_decl); -+ if (HAVE_COMDAT_GROUP && DECL_COMDAT_GROUP (current_function_decl)) -+ switch_to_comdat_section (sect, current_function_decl); -+ else -+ switch_to_section (sect); - assemble_align (POINTER_SIZE); - fputs (asm_op, file); - assemble_name_raw (file, buf); -diff --git a/gcc/testsuite/g++.dg/modules/pr105169.h b/gcc/testsuite/g++.dg/modules/pr105169.h -new file mode 100644 -index 000000000..a7e762705 ---- /dev/null -+++ b/gcc/testsuite/g++.dg/modules/pr105169.h -@@ -0,0 +1,22 @@ -+class IPXAddressClass -+{ -+public: -+ IPXAddressClass(void); -+}; -+ -+class WinsockInterfaceClass -+{ -+ -+public: -+ WinsockInterfaceClass(void); -+ -+ virtual void Set_Broadcast_Address(void*){}; -+ -+ virtual int Get_Protocol(void) -+ { -+ return 0; -+ }; -+ -+protected: -+}; -+ -diff --git a/gcc/testsuite/g++.dg/modules/pr105169_a.C b/gcc/testsuite/g++.dg/modules/pr105169_a.C -new file mode 100644 -index 000000000..66dc4b790 ---- /dev/null -+++ b/gcc/testsuite/g++.dg/modules/pr105169_a.C -@@ -0,0 +1,25 @@ -+/* { dg-module-do link } */ -+/* { dg-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */ -+/* { dg-additional-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */ -+ -+/* This test is in the "modules" package because it supports multiple files -+ linkage. */ -+ -+#include "pr105169.h" -+ -+WinsockInterfaceClass* PacketTransport; -+ -+IPXAddressClass::IPXAddressClass(void) -+{ -+} -+ -+int function() -+{ -+ return PacketTransport->Get_Protocol(); -+} -+ -+int main() -+{ -+ IPXAddressClass ipxaddr; -+ return 0; -+} -diff --git a/gcc/testsuite/g++.dg/modules/pr105169_b.C b/gcc/testsuite/g++.dg/modules/pr105169_b.C -new file mode 100644 -index 000000000..5f8b00dfe ---- /dev/null -+++ b/gcc/testsuite/g++.dg/modules/pr105169_b.C -@@ -0,0 +1,12 @@ -+/* { dg-module-do link } */ -+/* { dg-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */ -+/* { dg-additional-options "-std=c++11 -fpatchable-function-entry=1 -O2" } */ -+ -+/* This test is in the "modules" package because it supports multiple files -+ linkage. */ -+ -+#include "pr105169.h" -+ -+WinsockInterfaceClass::WinsockInterfaceClass(void) -+{ -+} -diff --git a/gcc/varasm.cc b/gcc/varasm.cc -index 3f69b47a7..bae935694 100644 ---- a/gcc/varasm.cc -+++ b/gcc/varasm.cc -@@ -8459,25 +8459,21 @@ default_asm_output_ident_directive (const char *ident_str) - fprintf (asm_out_file, "%s\"%s\"\n", ident_asm_op, ident_str); - } - -- --/* This function ensures that vtable_map variables are not only -- in the comdat section, but that each variable has its own unique -- comdat name. Without this the variables end up in the same section -- with a single comdat name. -- -+/* Switch to a COMDAT section with COMDAT name of decl. -+ - FIXME: resolve_unique_section needs to deal better with - decls with both DECL_SECTION_NAME and DECL_ONE_ONLY. Once - that is fixed, this if-else statement can be replaced with - a single call to "switch_to_section (sect)". */ - --static void --handle_vtv_comdat_section (section *sect, const_tree decl ATTRIBUTE_UNUSED) -+void -+switch_to_comdat_section (section *sect, tree decl) - { - #if defined (OBJECT_FORMAT_ELF) - targetm.asm_out.named_section (sect->named.name, - sect->named.common.flags - | SECTION_LINKONCE, -- DECL_NAME (decl)); -+ decl); - in_section = sect; - #else - /* Neither OBJECT_FORMAT_PE, nor OBJECT_FORMAT_COFF is set here. -@@ -8492,18 +8488,18 @@ handle_vtv_comdat_section (section *sect, const_tree decl ATTRIBUTE_UNUSED) - { - char *name; - -- if (TREE_CODE (DECL_NAME (decl)) == IDENTIFIER_NODE) -+ if (TREE_CODE (decl) == IDENTIFIER_NODE) - name = ACONCAT ((sect->named.name, "$", -- IDENTIFIER_POINTER (DECL_NAME (decl)), NULL)); -+ IDENTIFIER_POINTER (decl), NULL)); - else - name = ACONCAT ((sect->named.name, "$", -- IDENTIFIER_POINTER (DECL_COMDAT_GROUP (DECL_NAME (decl))), -+ IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)), - NULL)); - - targetm.asm_out.named_section (name, - sect->named.common.flags - | SECTION_LINKONCE, -- DECL_NAME (decl)); -+ decl); - in_section = sect; - } - else -@@ -8511,4 +8507,15 @@ handle_vtv_comdat_section (section *sect, const_tree decl ATTRIBUTE_UNUSED) - #endif - } - -+/* This function ensures that vtable_map variables are not only -+ in the comdat section, but that each variable has its own unique -+ comdat name. Without this the variables end up in the same section -+ with a single comdat name. */ -+ -+static void -+handle_vtv_comdat_section (section *sect, const_tree decl ATTRIBUTE_UNUSED) -+{ -+ switch_to_comdat_section(sect, DECL_NAME (decl)); -+} -+ - #include "gt-varasm.h" -diff --git a/gcc/varasm.h b/gcc/varasm.h -index d5d8c4e55..8ba8374e7 100644 ---- a/gcc/varasm.h -+++ b/gcc/varasm.h -@@ -79,4 +79,6 @@ extern rtx assemble_static_space (unsigned HOST_WIDE_INT); - - extern rtx assemble_trampoline_template (void); - -+extern void switch_to_comdat_section (section *, tree); -+ - #endif // GCC_VARASM_H --- -2.33.0 - diff --git a/0248-Backport-SME-RISC-V-autovec-Verify-that-GET_MODE_NUN.patch b/0248-Backport-SME-RISC-V-autovec-Verify-that-GET_MODE_NUN.patch deleted file mode 100644 index df79d84..0000000 --- a/0248-Backport-SME-RISC-V-autovec-Verify-that-GET_MODE_NUN.patch +++ /dev/null @@ -1,53 +0,0 @@ -From a3b4a0ac472415a52ce836e8997f7a69a06fad33 Mon Sep 17 00:00:00 2001 -From: Michael Collison -Date: Sat, 6 May 2023 12:37:50 -0600 -Subject: [PATCH 149/157] [Backport][SME] RISC-V: autovec: Verify that - GET_MODE_NUNITS is a multiple of 2. - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=730909fa858bd691095bc23655077aa13b7941a9 - -While working on autovectorizing for the RISCV port I encountered an issue -where can_duplicate_and_interleave_p assumes that GET_MODE_NUNITS is a -evenly divisible by two. The RISC-V target has vector modes (e.g. VNx1DImode), -where GET_MODE_NUNITS is equal to one. - -Tested on RISCV and x86_64-linux-gnu. Okay? - -gcc/ - * tree-vect-slp.cc (can_duplicate_and_interleave_p): - Check that GET_MODE_NUNITS is a multiple of 2. ---- - gcc/tree-vect-slp.cc | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc -index 6cbf8085f..d02f0ce37 100644 ---- a/gcc/tree-vect-slp.cc -+++ b/gcc/tree-vect-slp.cc -@@ -401,10 +401,13 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count, - (GET_MODE_BITSIZE (int_mode), 1); - tree vector_type - = get_vectype_for_scalar_type (vinfo, int_type, count); -+ poly_int64 half_nelts; - if (vector_type - && VECTOR_MODE_P (TYPE_MODE (vector_type)) - && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)), -- GET_MODE_SIZE (base_vector_mode))) -+ GET_MODE_SIZE (base_vector_mode)) -+ && multiple_p (GET_MODE_NUNITS (TYPE_MODE (vector_type)), -+ 2, &half_nelts)) - { - /* Try fusing consecutive sequences of COUNT / NVECTORS elements - together into elements of type INT_TYPE and using the result -@@ -412,7 +415,7 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count, - poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type)); - vec_perm_builder sel1 (nelts, 2, 3); - vec_perm_builder sel2 (nelts, 2, 3); -- poly_int64 half_nelts = exact_div (nelts, 2); -+ - for (unsigned int i = 0; i < 3; ++i) - { - sel1.quick_push (i); --- -2.33.0 - diff --git a/0249-Backport-SME-Add-operator-to-gimple_stmt_iterator-an.patch b/0249-Backport-SME-Add-operator-to-gimple_stmt_iterator-an.patch deleted file mode 100644 index 293df25..0000000 --- a/0249-Backport-SME-Add-operator-to-gimple_stmt_iterator-an.patch +++ /dev/null @@ -1,42 +0,0 @@ -From b0ca9a6eb1406a60eec566cf302790bee89879af Mon Sep 17 00:00:00 2001 -From: Richard Biener -Date: Tue, 18 Apr 2023 16:58:26 +0200 -Subject: [PATCH 150/157] [Backport][SME] Add operator* to gimple_stmt_iterator - and gphi_iterator - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c39cdd9e654540f74cd2478019c40f1611554a44 - -This allows STL style iterator dereference. It's the same -as gsi_stmt () or .phi (). - - * gimple-iterator.h (gimple_stmt_iterator::operator*): Add. - (gphi_iterator::operator*): Likewise. ---- - gcc/gimple-iterator.h | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/gcc/gimple-iterator.h b/gcc/gimple-iterator.h -index 216ebee24..5d281e4f7 100644 ---- a/gcc/gimple-iterator.h -+++ b/gcc/gimple-iterator.h -@@ -24,6 +24,8 @@ along with GCC; see the file COPYING3. If not see - - struct gimple_stmt_iterator - { -+ gimple *operator * () const { return ptr; } -+ - /* Sequence node holding the current statement. */ - gimple_seq_node ptr; - -@@ -38,6 +40,8 @@ struct gimple_stmt_iterator - /* Iterator over GIMPLE_PHI statements. */ - struct gphi_iterator : public gimple_stmt_iterator - { -+ gphi *operator * () const { return as_a (ptr); } -+ - gphi *phi () const - { - return as_a (ptr); --- -2.33.0 - diff --git a/0250-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch b/0250-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch deleted file mode 100644 index 16a75f8..0000000 --- a/0250-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 90518c07dfb770b680fd8bdba76dd1b39103277d Mon Sep 17 00:00:00 2001 -From: Richard Biener -Date: Fri, 10 Nov 2023 12:39:11 +0100 -Subject: [PATCH 151/157] [Backport][SME] tree-optimization/110221 - SLP and - loop mask/len - -Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e5f1956498251a4973d52c8aad3faf34d0443169 - -The following fixes the issue that when SLP stmts are internal defs -but appear invariant because they end up only using invariant defs -then they get scheduled outside of the loop. This nice optimization -breaks down when loop masks or lens are applied since those are not -explicitly tracked as dependences. The following makes sure to never -schedule internal defs outside of the vectorized loop when the -loop uses masks/lens. - - PR tree-optimization/110221 - * tree-vect-slp.cc (vect_schedule_slp_node): When loop - masking / len is applied make sure to not schedule - intenal defs outside of the loop. - - * gfortran.dg/pr110221.f: New testcase. ---- - gcc/testsuite/gfortran.dg/pr110221.f | 17 +++++++++++++++++ - gcc/tree-vect-slp.cc | 10 ++++++++++ - 2 files changed, 27 insertions(+) - create mode 100644 gcc/testsuite/gfortran.dg/pr110221.f - -diff --git a/gcc/testsuite/gfortran.dg/pr110221.f b/gcc/testsuite/gfortran.dg/pr110221.f -new file mode 100644 -index 000000000..8b5738431 ---- /dev/null -+++ b/gcc/testsuite/gfortran.dg/pr110221.f -@@ -0,0 +1,17 @@ -+C PR middle-end/68146 -+C { dg-do compile } -+C { dg-options "-O2 -w" } -+C { dg-additional-options "-mavx512f --param vect-partial-vector-usage=2" { target avx512f } } -+ SUBROUTINE CJYVB(V,Z,V0,CBJ,CDJ,CBY,CYY) -+ IMPLICIT DOUBLE PRECISION (A,B,G,O-Y) -+ IMPLICIT COMPLEX*16 (C,Z) -+ DIMENSION CBJ(0:*),CDJ(0:*),CBY(0:*) -+ N=INT(V) -+ CALL GAMMA2(VG,GA) -+ DO 65 K=1,N -+ CBY(K)=CYY -+65 CONTINUE -+ CDJ(0)=V0/Z*CBJ(0)-CBJ(1) -+ DO 70 K=1,N -+70 CDJ(K)=-(K+V0)/Z*CBJ(K)+CBJ(K-1) -+ END -diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc -index d02f0ce37..e3e246977 100644 ---- a/gcc/tree-vect-slp.cc -+++ b/gcc/tree-vect-slp.cc -@@ -8531,6 +8531,16 @@ vect_schedule_slp_node (vec_info *vinfo, - /* Emit other stmts after the children vectorized defs which is - earliest possible. */ - gimple *last_stmt = NULL; -+ if (auto loop_vinfo = dyn_cast (vinfo)) -+ if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) -+ || LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)) -+ { -+ /* But avoid scheduling internal defs outside of the loop when -+ we might have only implicitly tracked loop mask/len defs. */ -+ gimple_stmt_iterator si -+ = gsi_after_labels (LOOP_VINFO_LOOP (loop_vinfo)->header); -+ last_stmt = *si; -+ } - bool seen_vector_def = false; - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) - if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) --- -2.33.0 - diff --git a/0251-SME-Adapt-some-testsuites.patch b/0251-SME-Adapt-some-testsuites.patch deleted file mode 100644 index 1653f53..0000000 --- a/0251-SME-Adapt-some-testsuites.patch +++ /dev/null @@ -1,116 +0,0 @@ -From b60c29e6658c8620f1116ce5a38a6eb823af64e6 Mon Sep 17 00:00:00 2001 -From: xiezhiheng -Date: Thu, 7 Mar 2024 10:22:39 +0800 -Subject: [PATCH 152/157] [SME] Adapt some testsuites - -gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp: - GCC 12.3.0 do not support -std=c23 and -std=gnu23 - -gcc.target/aarch64/sme/streaming_mode_2.c: - It's a warning in GCC 12.3.0 - -gcc.dg/c2x-attr-syntax-6.c: -gcc.dg/c2x-attr-syntax-7.c: - GCC 12.3.0 do not support C2x (...) function prototypes and - C2x noreturn attribute - -gcc.target/aarch64/sme/za_state_4.c: - Seems need a ldp/stp optimization, not a functionality issue ---- - gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c | 2 -- - gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c | 2 -- - .../gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp | 2 -- - .../gcc.target/aarch64/sme/streaming_mode_2.c | 12 ++++++------ - gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c | 1 + - 5 files changed, 7 insertions(+), 12 deletions(-) - -diff --git a/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c b/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c -index 9e5f65ce4..2385b25fe 100644 ---- a/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c -+++ b/gcc/testsuite/gcc.dg/c2x-attr-syntax-6.c -@@ -15,13 +15,11 @@ typedef int [[__extension__ gnu FOO vector_size (4)]] g5; - typedef int [[__extension__ gnu BAR BAR vector_size (4)]] g6; - typedef int [[__extension__ gnu :/**/: vector_size (4)]] g7; - typedef int [[__extension__ gnu JOIN(:,:) vector_size (4)]] g8; --typedef int [[__extension__ gnu :: vector_size (sizeof (void (*)(...)))]] g10; - typedef int [[__extension__]] g11; - typedef int [[__extension__,]] g12; - typedef int [[__extension__, ,,,, ,, ,]] g13; - [[__extension__ deprecated]] int g14 (); - [[__extension__ nodiscard]] int g15 (); --[[__extension__ noreturn]] void g16 (); - - int - cases (int x) -diff --git a/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c b/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c -index 702f733b1..5bbdba665 100644 ---- a/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c -+++ b/gcc/testsuite/gcc.dg/c2x-attr-syntax-7.c -@@ -15,13 +15,11 @@ typedef int [[__extension__ gnu FOO vector_size (4)]] g5; - typedef int [[__extension__ gnu BAR BAR vector_size (4)]] g6; - typedef int [[__extension__ gnu :/**/: vector_size (4)]] g7; - typedef int [[__extension__ gnu JOIN(:,:) vector_size (4)]] g8; --typedef int [[__extension__ gnu :: vector_size (sizeof (void (*)(...)))]] g10; - typedef int [[__extension__]] g11; - typedef int [[__extension__,]] g12; - typedef int [[__extension__, ,,,, ,, ,]] g13; - [[__extension__ deprecated]] int g14 (); - [[__extension__ nodiscard]] int g15 (); --[[__extension__ noreturn]] void g16 (); - - int - cases (int x) -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp -index e2d002f26..a0a4fe4f7 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp -+++ b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme-acle-asm.exp -@@ -52,9 +52,7 @@ set-torture-options { - "-std=c90 -O0 -g" - "-std=c99 -Og -g" - "-std=c11 -Os -g" -- "-std=c23 -O2 -fno-schedule-insns -fno-schedule-insns2 -DCHECK_ASM --save-temps" - "-std=gnu90 -O3 -g" -- "-std=gnu23 -Ofast -g" - } { - "-DTEST_FULL" - "-DTEST_OVERLOADS" -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c -index e8be0f821..1e328c817 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_2.c -@@ -12,14 +12,14 @@ void - f () - { - sc_fn_ptr = sc_fn; -- sc_fn_ptr = s_fn; // { dg-error "incompatible pointer type" } -- sc_fn_ptr = ns_fn; // { dg-error "incompatible pointer type" } -+ sc_fn_ptr = s_fn; // { dg-warning "incompatible pointer type" } -+ sc_fn_ptr = ns_fn; // { dg-warning "incompatible pointer type" } - -- s_fn_ptr = sc_fn; // { dg-error "incompatible pointer type" } -+ s_fn_ptr = sc_fn; // { dg-warning "incompatible pointer type" } - s_fn_ptr = s_fn; -- s_fn_ptr = ns_fn; // { dg-error "incompatible pointer type" } -+ s_fn_ptr = ns_fn; // { dg-warning "incompatible pointer type" } - -- ns_fn_ptr = sc_fn; // { dg-error "incompatible pointer type" } -- ns_fn_ptr = s_fn; // { dg-error "incompatible pointer type" } -+ ns_fn_ptr = sc_fn; // { dg-warning "incompatible pointer type" } -+ ns_fn_ptr = s_fn; // { dg-warning "incompatible pointer type" } - ns_fn_ptr = ns_fn; - } -diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c -index cec0abf0e..a764a7c89 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c -+++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c -@@ -105,6 +105,7 @@ __arm_new("za") void test5() - ** mul (x[0-9]+), \1, \1 - ** sub sp, sp, \2 - ** mov (x[0-9]+), sp -+** add [^\n]+ - ** stp \3, \1, \[x29, #?16\] - ** add (x[0-9]+), x29, #?16 - ** msr tpidr2_el0, \4 --- -2.33.0 - diff --git a/0252-SME-Fix-error-by-backported-patches-and-IPA-prefetch.patch b/0252-SME-Fix-error-by-backported-patches-and-IPA-prefetch.patch deleted file mode 100644 index 6018ee7..0000000 --- a/0252-SME-Fix-error-by-backported-patches-and-IPA-prefetch.patch +++ /dev/null @@ -1,43 +0,0 @@ -From ce53aec1f43f79c093db662a2e8e3062462757b4 Mon Sep 17 00:00:00 2001 -From: xiezhiheng -Date: Thu, 22 Aug 2024 16:35:28 +0800 -Subject: [PATCH 153/157] [SME] Fix error by backported patches and IPA - prefetch - -Fix -gtype-desc.cc: In function 'void gt_pch_p_30vec_cgraph_node__va_gc_atomic_(void*, void*, gt_pointer_operator, void*)': -gtype-desc.cc:11032:35: error: call of overloaded 'gt_pch_nx(vec*, void (*&)(void*, void*, void*), void*&)' is ambiguous -11032 | gt_pch_nx (&((*x)), op, cookie); - | ^ -In file included from ../../gcc/hash-table.h:248, - from ../../gcc/coretypes.h:486, - from gtype-desc.cc:23: -../../gcc/vec.h:1395:1: note: candidate: 'void gt_pch_nx(vec*, gt_pointer_operator, void*) [with T = cgraph_node; A = va_gc_atomic; gt_pointer_operator = void (*)(void*, void*, void*)]' - 1395 | gt_pch_nx (vec *v, gt_pointer_operator op, void *cookie) - | ^~~~~~~~~ -../../gcc/vec.h:1403:1: note: candidate: 'void gt_pch_nx(vec*, gt_pointer_operator, void*) [with T = cgraph_node*; A = va_gc_atomic; gt_pointer_operator = void (*)(void*, void*, void*)]' - 1403 | gt_pch_nx (vec *v, gt_pointer_operator op, void *cookie) - | ^~~~~~~~~ -../../gcc/vec.h:1412:1: note: candidate: 'void gt_pch_nx(vec*, gt_pointer_operator, void*) [with T = cgraph_node*; gt_pointer_operator = void (*)(void*, void*, void*)]' - 1412 | gt_pch_nx (vec *, gt_pointer_operator, void *) - | ^~~~~~~~~ ---- - gcc/cgraph.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/gcc/cgraph.h b/gcc/cgraph.h -index b84ff2f98..2332539e5 100644 ---- a/gcc/cgraph.h -+++ b/gcc/cgraph.h -@@ -1660,7 +1660,7 @@ public: - /* ECF flags determined from the caller. */ - int ecf_flags; - /* Vector of potential call targets determined by analysis. */ -- vec *targets; -+ vec *targets; - - /* Number of speculative call targets, it's less than GCOV_TOPN_VALUES. */ - unsigned num_speculative_call_targets : 16; --- -2.33.0 - diff --git a/0285-SME-Recover-hip09-and-hip11-in-aarch64-cores.def.patch b/0285-SME-Recover-hip09-and-hip11-in-aarch64-cores.def.patch deleted file mode 100644 index 24dacd1..0000000 --- a/0285-SME-Recover-hip09-and-hip11-in-aarch64-cores.def.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 239f0637307ff2f6afb1473e99d0bb0eaf8946b2 Mon Sep 17 00:00:00 2001 -From: xiezhiheng -Date: Fri, 23 Aug 2024 15:37:17 +0800 -Subject: [PATCH 154/157] [SME] Recover hip09 and hip11 in aarch64-cores.def - ---- - gcc/config/aarch64/aarch64-cores.def | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def -index f069c81cf..3337fd1a0 100644 ---- a/gcc/config/aarch64/aarch64-cores.def -+++ b/gcc/config/aarch64/aarch64-cores.def -@@ -130,6 +130,7 @@ AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, (F16, SVE), a64fx, 0x46, 0x001, -1) - - /* HiSilicon ('H') cores. */ - AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, (CRYPTO, F16), tsv110, 0x48, 0xd01, -1) -+AARCH64_CORE("hip09", hip09, hip09, V8_5A, (SVE, I8MM, F32MM, F64MM, PROFILE, PREDRES), hip09, 0x48, 0xd02, 0x0) - - /* ARMv8.3-A Architecture Processors. */ - -@@ -171,6 +172,7 @@ AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, (SVE2_BITPERM, MEMTAG, - AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, (SVE2_BITPERM, MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd48, -1) - - AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversen2, 0x41, 0xd49, -1) -+AARCH64_CORE("hip11", hip11, hip11, V8_5A, (SVE, SVE2, F16), hip11, 0x48, 0xd22, -1) - - AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) - AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) --- -2.33.0 - diff --git a/0286-Try-to-use-AI-model-to-guide-optimization.patch b/0286-Try-to-use-AI-model-to-guide-optimization.patch index a697dcc..82cb72e 100644 --- a/0286-Try-to-use-AI-model-to-guide-optimization.patch +++ b/0286-Try-to-use-AI-model-to-guide-optimization.patch @@ -175,8 +175,8 @@ index 309ecc3d9..ad853af9a 100644 and which when vectorized would operate on vector type VECTYPE. Add the cost of any embedded operations. */ @@ -20089,6 +20217,7 @@ aarch64_override_options_internal (struct gcc_options *opts) - && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level) - opts->x_flag_prefetch_loop_arrays = 1; + if (opts->x_aarch64_tune_string == NULL) + opts->x_aarch64_tune_string = selected_tune->name; + reset_machine_option (opts); aarch64_override_options_after_change_1 (opts); diff --git a/0303-Add-generation-of-stream-in-functions-for-pre-versio.patch b/0303-Add-generation-of-stream-in-functions-for-pre-versio.patch index dfa965b..0df59b9 100644 --- a/0303-Add-generation-of-stream-in-functions-for-pre-versio.patch +++ b/0303-Add-generation-of-stream-in-functions-for-pre-versio.patch @@ -240,13 +240,13 @@ index 76e9b3cb9..7c012dd4e 100644 + print " bp_unpack_string (data_in, bp);" +} +if ("x_aarch64_asm_isa_flags" in var_target_val_set) { -+ print " ptr->x_aarch64_asm_isa_flags = (aarch64_feature_flags) bp_unpack_value (bp, 64);" ++ print " ptr->x_aarch64_asm_isa_flags = bp_unpack_value (bp, 64);" +} +else { + print " bp_unpack_value (bp, 64);" +} +if ("x_aarch64_isa_flags" in var_target_val_set) { -+ print " ptr->x_aarch64_isa_flags = (aarch64_feature_flags) bp_unpack_value (bp, 64);" ++ print " ptr->x_aarch64_isa_flags = bp_unpack_value (bp, 64);" +} +else { + print " bp_unpack_value (bp, 64);" diff --git a/0304-Add-multi-version-lto-symbol-parse-cross-lto-units-i.patch b/0304-Add-multi-version-lto-symbol-parse-cross-lto-units-i.patch index cdd28b1..32e2ca3 100644 --- a/0304-Add-multi-version-lto-symbol-parse-cross-lto-units-i.patch +++ b/0304-Add-multi-version-lto-symbol-parse-cross-lto-units-i.patch @@ -88,15 +88,15 @@ index 025a3c478..f095f17aa 100644 + struct cl_target_option *ptr2) +{ + const char *const cpu1 -+ = aarch64_get_tune_cpu (ptr1->x_selected_tune)->name; -+ const struct processor *arch1 = aarch64_get_arch (ptr1->x_selected_arch); ++ = aarch64_get_tune_cpu (ptr1->x_explicit_tune_core)->name; ++ const struct processor *arch1 = aarch64_get_arch (ptr1->x_explicit_arch); + std::string extension1 + = aarch64_get_extension_string_for_isa_flags (ptr1->x_aarch64_isa_flags, + arch1->flags); + + const char *const cpu2 -+ = aarch64_get_tune_cpu (ptr2->x_selected_tune)->name; -+ const struct processor *arch2 = aarch64_get_arch (ptr2->x_selected_arch); ++ = aarch64_get_tune_cpu (ptr2->x_explicit_tune_core)->name; ++ const struct processor *arch2 = aarch64_get_arch (ptr2->x_explicit_arch); + std::string extension2 + = aarch64_get_extension_string_for_isa_flags (ptr2->x_aarch64_isa_flags, + arch2->flags); diff --git a/0326-BUGFIX-Fix-build-error-on-risv_64.patch b/0326-BUGFIX-Fix-build-error-on-risv_64.patch index 6d91eaa..01baebf 100644 --- a/0326-BUGFIX-Fix-build-error-on-risv_64.patch +++ b/0326-BUGFIX-Fix-build-error-on-risv_64.patch @@ -67,401 +67,401 @@ index 000000000..c3d99dd85 --- /dev/null +++ b/gcc/ai-optimizer.cc @@ -0,0 +1,395 @@ -+/* Lightweight AI Inference Framework. -+ Copyright (C) 2024-2024 Free Software Foundation, Inc. -+This file is part of GCC. -+GCC is free software; you can redistribute it and/or modify it under -+the terms of the GNU General Public License as published by the Free -+Software Foundation; either version 3, or (at your option) any later -+version. -+GCC is distributed in the hope that it will be useful, but WITHOUT ANY -+WARRANTY; without even the implied warranty of MERCHANTABILITY or -+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "config.h" -+#include "system.h" -+#include "ai4c-infer.h" -+ -+#define M_OPTION_SIZE 11 -+#define M_MODE_SIZE 6 -+#define NATIVE_TUNE_SIZE 128 -+#define CATS_STRINGS_ROW 34 -+#define CATS_STRINGS_COL 65 -+#define CATS_STRINGS1_ROW 10 -+#define CATS_STRINGS1_COL 65 -+#define OFFSET_ROW 6 -+#define SCALE_ROW 6 -+#define UNITY_ROW 1 -+#define COEFFICIENT_ROW 356 -+#define COEFFICIENT_COL 10 -+#define COEFFICIENT1_ROW 10 -+#define COEFFICIENT1_COL 1 -+#define INTERCEPTS_ROW 10 -+#define INTERCEPTS1_ROW 1 -+ -+/* Intermediate computation results from the ONNX model. */ -+static char cats_strings[CATS_STRINGS_ROW][CATS_STRINGS_COL]; -+static char cats_strings1[CATS_STRINGS1_ROW][CATS_STRINGS1_COL]; -+static float offset[OFFSET_ROW]; -+static float scale[SCALE_ROW]; -+static float unity[UNITY_ROW]; -+static float coefficient[COEFFICIENT_ROW][COEFFICIENT_COL]; -+static float coefficient1[COEFFICIENT1_ROW][COEFFICIENT1_COL]; -+static float intercepts[INTERCEPTS_ROW]; -+static float intercepts1[INTERCEPTS1_ROW]; -+ -+/* Return an integer that represents the comparison result of the -+ two strings. */ -+ -+static int -+compare_strings (const void *a, const void *b) -+{ -+ const char *str_a = *(const char **)a; -+ const char *str_b = *(const char **)b; -+ -+ int len = strlen (str_a) < strlen (str_b) ? strlen (str_a) : strlen (str_b); -+ for (int i = 0; i < len; i++) -+ { -+ char c1 = str_a[i]; -+ char c2 = str_b[i]; -+ if (ISUPPER (c1) && !ISUPPER (c2)) -+ return 0; -+ else if (!ISUPPER (c1) && ISUPPER (c2)) -+ return 1; -+ else if (c1 != c2) -+ return c1 < c2; -+ } -+ return strlen (str_a) > strlen (str_b); -+} -+ -+/* Return the substring before the first underscore ('_') in the input -+ string. */ -+ -+static void -+truncate_prefix (const char *str, char *result) -+{ -+ const char *underscore_pos = strchr (str, '_'); -+ if (underscore_pos == NULL) -+ { -+ strcpy (result, str); -+ return; -+ } -+ -+ size_t len = underscore_pos - str; -+ strncpy (result, str, len + 1); -+ result[len + 1] = '\0'; -+} -+ -+ -+static void -+preprocess (int argc1, const char **argv1, const char *mops, -+ int argc2, int64_t *argv2, char (*in_options)[1024], -+ int64_t *in_modes) -+{ -+ strcpy (in_options[0], mops); -+ -+ const char *output_option = "-o"; -+ const char *marco_prefix = "-D"; -+ const char *needle = "--param"; -+ const char *flag_prefix = "-"; -+ const char *default_option = "-default-option"; -+ const int default_int_val = 0; -+ int m_size = 0; -+ for (int i = 0; i < argc1; i++) -+ { -+ if (strncmp (argv1[i], marco_prefix, 2) == 0) -+ m_size ++; -+ } -+ -+ char *m_options[m_size]; -+ char output_file[1024]; -+ int m_index = 0; -+ for (int i = 0; i < argc1; i++) -+ { -+ if (strncmp (argv1[i], marco_prefix, 2) == 0) -+ { -+ m_options[m_index] = (char *)argv1[i]; -+ m_index ++; -+ } -+ if (strcmp (argv1[i], output_option) == 0) -+ truncate_prefix (argv1[i + 1], output_file); -+ } -+ -+ strcpy (in_options[1], output_file); -+ int in_options_size = 2; -+ qsort (m_options, m_size, sizeof (m_options[0]), compare_strings); -+ for (int i = 0; i < m_size && in_options_size < M_OPTION_SIZE; i++) -+ { -+ strcpy (in_options[in_options_size], m_options[i]); -+ in_options_size ++; -+ } -+ -+ for (int i = 0; i < argc1 && in_options_size < M_OPTION_SIZE; i++) -+ { -+ if (strncmp (argv1[i], marco_prefix, 2) != 0 -+ && strcmp (argv1[i], output_option) != 0 -+ && strncmp (argv1[i], needle, 7) != 0 -+ && strncmp (argv1[i], flag_prefix, 1) == 0) -+ { -+ strcpy (in_options[in_options_size], argv1[i]); -+ in_options_size ++; -+ } -+ } -+ -+ while (in_options_size < M_OPTION_SIZE) -+ { -+ strcpy (in_options[in_options_size], default_option); -+ in_options_size ++; -+ } -+ -+ /* Use sha256 to encrypt the input. */ -+ char hash[65]; -+ char input[64]; -+ for (int i = 0; i < M_OPTION_SIZE; i++) -+ { -+ execute_sha256 (in_options[i], hash, sizeof (hash)); -+ strcpy (in_options[i], hash); -+ } -+ -+ for (int i = 0; i < argc2 && i < M_MODE_SIZE; i++) -+ { -+ if (i < argc2) -+ in_modes[i] = argv2[i]; -+ else -+ in_modes[i] = default_int_val; -+ } -+} -+ -+/* To read model parameter information from optimizer.fdata and store it into -+ the appropriate arrays. */ -+ -+static void -+fill_node (const char *file_name) -+{ -+ FILE *file = fopen (file_name, "rb"); -+ -+ if (!file) -+ { -+ perror ("Can not open file."); -+ return; -+ } -+ -+ /* Read cats_strings from optimizer.fdata. */ -+ char hex_string[2]; -+ for (int i = 0; i < CATS_STRINGS_ROW; i++) -+ { -+ for (int j = 0; j < CATS_STRINGS_COL - 1; j++) -+ { -+ if (fscanf (file, "%2s", hex_string) != 1) -+ { -+ perror ("Can not read cats_strings from optimizer.fdata."); -+ return; -+ } -+ cats_strings[i][j] = (unsigned char) strtol(hex_string, NULL, 16); -+ } -+ cats_strings[i][CATS_STRINGS_COL - 1] = '\0'; -+ } -+ -+ /* Read cats_strings1 from optimizer.fdata. */ -+ for (int i = 0; i < CATS_STRINGS1_ROW; i++) -+ { -+ for (int j = 0; j < CATS_STRINGS1_COL - 1; j++) -+ { -+ if (fscanf (file, "%2s", hex_string) != 1) -+ { -+ perror ("Can not read cats_strings1 from optimizer.fdata."); -+ return; -+ } -+ cats_strings1[i][j] = (unsigned char) strtol(hex_string, NULL, 16); -+ } -+ cats_strings1[i][CATS_STRINGS1_COL - 1] = '\0'; -+ } -+ -+ /* Read offset from optimizer.fdata. */ -+ for (int i = 0; i < OFFSET_ROW; i++) -+ { -+ float result = read_float_from_file (file); -+ offset[i] = result; -+ } -+ -+ -+ /* Read scale from optimizer.fdata. */ -+ for (int i = 0; i < SCALE_ROW; i++) -+ { -+ float result = read_float_from_file (file); -+ scale[i] = result; -+ } -+ -+ /* Read unity from optimizer.fdata. */ -+ for (int i = 0; i < UNITY_ROW; i++) -+ { -+ float result = read_float_from_file (file); -+ unity[i] = result; -+ } -+ -+ /* Read coefficient from optimizer.fdata. */ -+ for (int i = 0; i < COEFFICIENT_ROW; i++) -+ for (int j = 0; j < COEFFICIENT_COL; j++) -+ { -+ float result = read_float_from_file (file); -+ coefficient[i][j] = result; -+ } -+ -+ /* Read coefficient1 from optimizer.fdata. */ -+ for (int i = 0; i < COEFFICIENT1_ROW; i++) -+ for (int j = 0; j < COEFFICIENT1_COL; j++) -+ { -+ float result = read_float_from_file (file); -+ coefficient1[i][j] = result; -+ } -+ -+ /* Read intercepts from optimizer.fdata. */ -+ for (int i = 0; i < INTERCEPTS_ROW; i++) -+ { -+ float result = read_float_from_file (file); -+ intercepts[i] = result; -+ } -+ -+ /* Read intercepts1 from optimizer.fdata. */ -+ for (int i = 0; i < INTERCEPTS1_ROW; i++) -+ { -+ float result = read_float_from_file (file); -+ intercepts1[i] = result; -+ } -+ -+ fclose (file); -+ return; -+} -+ -+/* The process of model inference. */ -+ -+static int -+graph_infer (int argc1, const char **argv1, const char *mops, -+ int argc2, int64_t *argv2) -+{ -+ char *gcc_exec_prefix = getenv ("ONNX_FDATA_PATH"); -+ if (gcc_exec_prefix == NULL) -+ return 0; -+ char native_file[512]; -+ -+ if (gcc_exec_prefix) -+ { -+ const char *onnx_fdata = "optimizer.fdata"; -+ strncpy (native_file, gcc_exec_prefix, sizeof (native_file) - 1); -+ native_file[sizeof (native_file) - 1] = '\0'; -+ char *last_slash = strrchr (native_file, '/'); -+ if (last_slash) -+ strcpy (last_slash + 1, onnx_fdata); -+ } -+ -+ if (access (native_file, F_OK) == 0) -+ fill_node (native_file); -+ else -+ return 0; -+ -+ static int64_t in_modes[M_MODE_SIZE]; -+ static char in_options[M_OPTION_SIZE][1024]; -+ -+ preprocess (argc1, argv1, mops, argc2, argv2, in_options, in_modes); -+ -+ /* concat_result and encoder_out are intermediate computation results from -+ the ONNX model. concat_result is a 1 × 18 matrix, and encoder_out is a -+ 1 × 12 matrix. */ -+ -+ const int concat_out_size = 350; -+ float concat_result[concat_out_size]; -+ const int encoder_out_size = 34; -+ const int encoder_last_size = 10; -+ int concat_size = 0; -+ const int size = encoder_out_size; -+ -+ for (int i = 1; i < M_OPTION_SIZE; i++) -+ { -+ float encoder_out[size]; -+ one_hot_encoder (in_options[i], cats_strings, encoder_out, size); -+ line_concat (encoder_out, size, concat_result, concat_size); -+ concat_size += size; -+ } -+ -+ float encoder_out2[encoder_last_size]; -+ one_hot_encoder (in_options[0], cats_strings1, encoder_out2, -+ encoder_last_size); -+ line_concat (encoder_out2, encoder_last_size, concat_result, concat_size); -+ concat_size += encoder_last_size; -+ -+ float variable[M_MODE_SIZE]; -+ imputer (in_modes, M_MODE_SIZE, variable); -+ float variable1[M_MODE_SIZE]; -+ scaler (variable, offset, scale, M_MODE_SIZE, variable1); -+ -+ float transformed_column[concat_out_size + M_MODE_SIZE]; -+ /* line_concat is used to stro*/ -+ line_concat (variable1, M_MODE_SIZE, transformed_column, 0); -+ line_concat (concat_result, concat_out_size, transformed_column, 6); -+ -+ /* This requires performing matrix multiplication between a 1 × 356 matrix -+ and an 356 × 10 matrix */ -+ -+ const int m = 1, k = 356, n = 10; -+ float mul_result[n]; -+ matmul (transformed_column, coefficient[0], m, k, n, mul_result); -+ -+ float add_result[n]; -+ add (mul_result, intercepts, n, add_result); -+ -+ float next_activations[n]; -+ relu (add_result, n, next_activations); -+ -+ /* This requires performing matrix multiplication between a 1 × 10 matrix -+ and an 10 × 1 matrix */ -+ -+ const int m2 = 1, k2 = 10, n2 = 1; -+ float mul_result1[n2]; -+ matmul (next_activations, coefficient1[0], m2, k2, n2, mul_result1); -+ -+ float add_result1[n2]; -+ add (mul_result1, intercepts1, n2, add_result1); -+ -+ float out_activations_result[n2]; -+ sigmoid (add_result1, n2, out_activations_result); -+ -+ float negative_class_proba[n2]; -+ sub (unity, out_activations_result, n2, negative_class_proba); -+ const int prob_size = n2 + n2; -+ float probabilities[prob_size]; -+ line_concat (negative_class_proba, n2, probabilities, 0); -+ line_concat (out_activations_result, n2, probabilities, n2); -+ -+ int argmax_output = argmax (probabilities, prob_size); -+ return argmax_output; -+} -+ -+int -+get_optimize_decision_from_optimizer (int argc, const char **argv, -+ const char *mops, int argc2, -+ int64_t *argv2) -+{ -+ int model_pred = graph_infer (argc, argv, mops, argc2, argv2); -+ if (model_pred == 1) -+ { -+ putenv ("AI_INFER_LEVEL=1"); -+ } -+ return model_pred; -+} ++/* Lightweight AI Inference Framework. ++ Copyright (C) 2024-2024 Free Software Foundation, Inc. ++This file is part of GCC. ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "config.h" ++#include "system.h" ++#include "ai4c-infer.h" ++ ++#define M_OPTION_SIZE 11 ++#define M_MODE_SIZE 6 ++#define NATIVE_TUNE_SIZE 128 ++#define CATS_STRINGS_ROW 34 ++#define CATS_STRINGS_COL 65 ++#define CATS_STRINGS1_ROW 10 ++#define CATS_STRINGS1_COL 65 ++#define OFFSET_ROW 6 ++#define SCALE_ROW 6 ++#define UNITY_ROW 1 ++#define COEFFICIENT_ROW 356 ++#define COEFFICIENT_COL 10 ++#define COEFFICIENT1_ROW 10 ++#define COEFFICIENT1_COL 1 ++#define INTERCEPTS_ROW 10 ++#define INTERCEPTS1_ROW 1 ++ ++/* Intermediate computation results from the ONNX model. */ ++static char cats_strings[CATS_STRINGS_ROW][CATS_STRINGS_COL]; ++static char cats_strings1[CATS_STRINGS1_ROW][CATS_STRINGS1_COL]; ++static float offset[OFFSET_ROW]; ++static float scale[SCALE_ROW]; ++static float unity[UNITY_ROW]; ++static float coefficient[COEFFICIENT_ROW][COEFFICIENT_COL]; ++static float coefficient1[COEFFICIENT1_ROW][COEFFICIENT1_COL]; ++static float intercepts[INTERCEPTS_ROW]; ++static float intercepts1[INTERCEPTS1_ROW]; ++ ++/* Return an integer that represents the comparison result of the ++ two strings. */ ++ ++static int ++compare_strings (const void *a, const void *b) ++{ ++ const char *str_a = *(const char **)a; ++ const char *str_b = *(const char **)b; ++ ++ int len = strlen (str_a) < strlen (str_b) ? strlen (str_a) : strlen (str_b); ++ for (int i = 0; i < len; i++) ++ { ++ char c1 = str_a[i]; ++ char c2 = str_b[i]; ++ if (ISUPPER (c1) && !ISUPPER (c2)) ++ return 0; ++ else if (!ISUPPER (c1) && ISUPPER (c2)) ++ return 1; ++ else if (c1 != c2) ++ return c1 < c2; ++ } ++ return strlen (str_a) > strlen (str_b); ++} ++ ++/* Return the substring before the first underscore ('_') in the input ++ string. */ ++ ++static void ++truncate_prefix (const char *str, char *result) ++{ ++ const char *underscore_pos = strchr (str, '_'); ++ if (underscore_pos == NULL) ++ { ++ strcpy (result, str); ++ return; ++ } ++ ++ size_t len = underscore_pos - str; ++ strncpy (result, str, len + 1); ++ result[len + 1] = '\0'; ++} ++ ++ ++static void ++preprocess (int argc1, const char **argv1, const char *mops, ++ int argc2, int64_t *argv2, char (*in_options)[1024], ++ int64_t *in_modes) ++{ ++ strcpy (in_options[0], mops); ++ ++ const char *output_option = "-o"; ++ const char *marco_prefix = "-D"; ++ const char *needle = "--param"; ++ const char *flag_prefix = "-"; ++ const char *default_option = "-default-option"; ++ const int default_int_val = 0; ++ int m_size = 0; ++ for (int i = 0; i < argc1; i++) ++ { ++ if (strncmp (argv1[i], marco_prefix, 2) == 0) ++ m_size ++; ++ } ++ ++ char *m_options[m_size]; ++ char output_file[1024]; ++ int m_index = 0; ++ for (int i = 0; i < argc1; i++) ++ { ++ if (strncmp (argv1[i], marco_prefix, 2) == 0) ++ { ++ m_options[m_index] = (char *)argv1[i]; ++ m_index ++; ++ } ++ if (strcmp (argv1[i], output_option) == 0) ++ truncate_prefix (argv1[i + 1], output_file); ++ } ++ ++ strcpy (in_options[1], output_file); ++ int in_options_size = 2; ++ qsort (m_options, m_size, sizeof (m_options[0]), compare_strings); ++ for (int i = 0; i < m_size && in_options_size < M_OPTION_SIZE; i++) ++ { ++ strcpy (in_options[in_options_size], m_options[i]); ++ in_options_size ++; ++ } ++ ++ for (int i = 0; i < argc1 && in_options_size < M_OPTION_SIZE; i++) ++ { ++ if (strncmp (argv1[i], marco_prefix, 2) != 0 ++ && strcmp (argv1[i], output_option) != 0 ++ && strncmp (argv1[i], needle, 7) != 0 ++ && strncmp (argv1[i], flag_prefix, 1) == 0) ++ { ++ strcpy (in_options[in_options_size], argv1[i]); ++ in_options_size ++; ++ } ++ } ++ ++ while (in_options_size < M_OPTION_SIZE) ++ { ++ strcpy (in_options[in_options_size], default_option); ++ in_options_size ++; ++ } ++ ++ /* Use sha256 to encrypt the input. */ ++ char hash[65]; ++ char input[64]; ++ for (int i = 0; i < M_OPTION_SIZE; i++) ++ { ++ execute_sha256 (in_options[i], hash, sizeof (hash)); ++ strcpy (in_options[i], hash); ++ } ++ ++ for (int i = 0; i < argc2 && i < M_MODE_SIZE; i++) ++ { ++ if (i < argc2) ++ in_modes[i] = argv2[i]; ++ else ++ in_modes[i] = default_int_val; ++ } ++} ++ ++/* To read model parameter information from optimizer.fdata and store it into ++ the appropriate arrays. */ ++ ++static void ++fill_node (const char *file_name) ++{ ++ FILE *file = fopen (file_name, "rb"); ++ ++ if (!file) ++ { ++ perror ("Can not open file."); ++ return; ++ } ++ ++ /* Read cats_strings from optimizer.fdata. */ ++ char hex_string[2]; ++ for (int i = 0; i < CATS_STRINGS_ROW; i++) ++ { ++ for (int j = 0; j < CATS_STRINGS_COL - 1; j++) ++ { ++ if (fscanf (file, "%2s", hex_string) != 1) ++ { ++ perror ("Can not read cats_strings from optimizer.fdata."); ++ return; ++ } ++ cats_strings[i][j] = (unsigned char) strtol(hex_string, NULL, 16); ++ } ++ cats_strings[i][CATS_STRINGS_COL - 1] = '\0'; ++ } ++ ++ /* Read cats_strings1 from optimizer.fdata. */ ++ for (int i = 0; i < CATS_STRINGS1_ROW; i++) ++ { ++ for (int j = 0; j < CATS_STRINGS1_COL - 1; j++) ++ { ++ if (fscanf (file, "%2s", hex_string) != 1) ++ { ++ perror ("Can not read cats_strings1 from optimizer.fdata."); ++ return; ++ } ++ cats_strings1[i][j] = (unsigned char) strtol(hex_string, NULL, 16); ++ } ++ cats_strings1[i][CATS_STRINGS1_COL - 1] = '\0'; ++ } ++ ++ /* Read offset from optimizer.fdata. */ ++ for (int i = 0; i < OFFSET_ROW; i++) ++ { ++ float result = read_float_from_file (file); ++ offset[i] = result; ++ } ++ ++ ++ /* Read scale from optimizer.fdata. */ ++ for (int i = 0; i < SCALE_ROW; i++) ++ { ++ float result = read_float_from_file (file); ++ scale[i] = result; ++ } ++ ++ /* Read unity from optimizer.fdata. */ ++ for (int i = 0; i < UNITY_ROW; i++) ++ { ++ float result = read_float_from_file (file); ++ unity[i] = result; ++ } ++ ++ /* Read coefficient from optimizer.fdata. */ ++ for (int i = 0; i < COEFFICIENT_ROW; i++) ++ for (int j = 0; j < COEFFICIENT_COL; j++) ++ { ++ float result = read_float_from_file (file); ++ coefficient[i][j] = result; ++ } ++ ++ /* Read coefficient1 from optimizer.fdata. */ ++ for (int i = 0; i < COEFFICIENT1_ROW; i++) ++ for (int j = 0; j < COEFFICIENT1_COL; j++) ++ { ++ float result = read_float_from_file (file); ++ coefficient1[i][j] = result; ++ } ++ ++ /* Read intercepts from optimizer.fdata. */ ++ for (int i = 0; i < INTERCEPTS_ROW; i++) ++ { ++ float result = read_float_from_file (file); ++ intercepts[i] = result; ++ } ++ ++ /* Read intercepts1 from optimizer.fdata. */ ++ for (int i = 0; i < INTERCEPTS1_ROW; i++) ++ { ++ float result = read_float_from_file (file); ++ intercepts1[i] = result; ++ } ++ ++ fclose (file); ++ return; ++} ++ ++/* The process of model inference. */ ++ ++static int ++graph_infer (int argc1, const char **argv1, const char *mops, ++ int argc2, int64_t *argv2) ++{ ++ char *gcc_exec_prefix = getenv ("ONNX_FDATA_PATH"); ++ if (gcc_exec_prefix == NULL) ++ return 0; ++ char native_file[512]; ++ ++ if (gcc_exec_prefix) ++ { ++ const char *onnx_fdata = "optimizer.fdata"; ++ strncpy (native_file, gcc_exec_prefix, sizeof (native_file) - 1); ++ native_file[sizeof (native_file) - 1] = '\0'; ++ char *last_slash = strrchr (native_file, '/'); ++ if (last_slash) ++ strcpy (last_slash + 1, onnx_fdata); ++ } ++ ++ if (access (native_file, F_OK) == 0) ++ fill_node (native_file); ++ else ++ return 0; ++ ++ static int64_t in_modes[M_MODE_SIZE]; ++ static char in_options[M_OPTION_SIZE][1024]; ++ ++ preprocess (argc1, argv1, mops, argc2, argv2, in_options, in_modes); ++ ++ /* concat_result and encoder_out are intermediate computation results from ++ the ONNX model. concat_result is a 1 × 18 matrix, and encoder_out is a ++ 1 × 12 matrix. */ ++ ++ const int concat_out_size = 350; ++ float concat_result[concat_out_size]; ++ const int encoder_out_size = 34; ++ const int encoder_last_size = 10; ++ int concat_size = 0; ++ const int size = encoder_out_size; ++ ++ for (int i = 1; i < M_OPTION_SIZE; i++) ++ { ++ float encoder_out[size]; ++ one_hot_encoder (in_options[i], cats_strings, encoder_out, size); ++ line_concat (encoder_out, size, concat_result, concat_size); ++ concat_size += size; ++ } ++ ++ float encoder_out2[encoder_last_size]; ++ one_hot_encoder (in_options[0], cats_strings1, encoder_out2, ++ encoder_last_size); ++ line_concat (encoder_out2, encoder_last_size, concat_result, concat_size); ++ concat_size += encoder_last_size; ++ ++ float variable[M_MODE_SIZE]; ++ imputer (in_modes, M_MODE_SIZE, variable); ++ float variable1[M_MODE_SIZE]; ++ scaler (variable, offset, scale, M_MODE_SIZE, variable1); ++ ++ float transformed_column[concat_out_size + M_MODE_SIZE]; ++ /* line_concat is used to stro*/ ++ line_concat (variable1, M_MODE_SIZE, transformed_column, 0); ++ line_concat (concat_result, concat_out_size, transformed_column, 6); ++ ++ /* This requires performing matrix multiplication between a 1 × 356 matrix ++ and an 356 × 10 matrix */ ++ ++ const int m = 1, k = 356, n = 10; ++ float mul_result[n]; ++ matmul (transformed_column, coefficient[0], m, k, n, mul_result); ++ ++ float add_result[n]; ++ add (mul_result, intercepts, n, add_result); ++ ++ float next_activations[n]; ++ relu (add_result, n, next_activations); ++ ++ /* This requires performing matrix multiplication between a 1 × 10 matrix ++ and an 10 × 1 matrix */ ++ ++ const int m2 = 1, k2 = 10, n2 = 1; ++ float mul_result1[n2]; ++ matmul (next_activations, coefficient1[0], m2, k2, n2, mul_result1); ++ ++ float add_result1[n2]; ++ add (mul_result1, intercepts1, n2, add_result1); ++ ++ float out_activations_result[n2]; ++ sigmoid (add_result1, n2, out_activations_result); ++ ++ float negative_class_proba[n2]; ++ sub (unity, out_activations_result, n2, negative_class_proba); ++ const int prob_size = n2 + n2; ++ float probabilities[prob_size]; ++ line_concat (negative_class_proba, n2, probabilities, 0); ++ line_concat (out_activations_result, n2, probabilities, n2); ++ ++ int argmax_output = argmax (probabilities, prob_size); ++ return argmax_output; ++} ++ ++int ++get_optimize_decision_from_optimizer (int argc, const char **argv, ++ const char *mops, int argc2, ++ int64_t *argv2) ++{ ++ int model_pred = graph_infer (argc, argv, mops, argc2, argv2); ++ if (model_pred == 1) ++ { ++ putenv ("AI_INFER_LEVEL=1"); ++ } ++ return model_pred; ++} diff --git a/gcc/ai4c-infer.cc b/gcc/ai4c-infer.cc index 99f7a6b45..42922e1ca 100644 --- a/gcc/ai4c-infer.cc @@ -730,9 +730,9 @@ index 2d2ac42c4..dd739288c 100644 static void @@ -119,6 +134,7 @@ aarch64_define_unconditional_macros (cpp_reader *pfile) - cpp_opts->warn_variadic_macros = old_warn_variadic_macros; - cpp_opts->cpp_warn_c90_c99_compat = old_cpp_warn_c90_c99_compat; - } + builtin_define_with_int_value ("__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE / 8); + + builtin_define ("__GCC_ASM_FLAG_OUTPUTS__"); + reset_machine_option(&global_options); } @@ -783,9 +783,9 @@ index 829e0da8f..debb15522 100644 and which when vectorized would operate on vector type VECTYPE. Add the cost of any embedded operations. */ @@ -20348,7 +20354,6 @@ aarch64_override_options_internal (struct gcc_options *opts) - && aarch64_tune_params.prefetch->default_opt_level >= 0 - && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level) - opts->x_flag_prefetch_loop_arrays = 1; + opts->x_aarch64_cpu_string = selected_cpu->name; + if (opts->x_aarch64_tune_string == NULL) + opts->x_aarch64_tune_string = selected_tune->name; - reset_machine_option (opts); aarch64_override_options_after_change_1 (opts); diff --git a/gcc.spec b/gcc.spec index 1e43ba3..0df6221 100644 --- a/gcc.spec +++ b/gcc.spec @@ -2,7 +2,7 @@ %global gcc_major 12 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 59 +%global gcc_release 60 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 @@ -208,158 +208,29 @@ Patch99: 0099-Enable-Transposed-SLP.patch Patch100: 0100-Add-hip09-machine-discribtion.patch Patch101: 0101-Add-hip11-CPU-pipeline-scheduling.patch Patch102: 0102-Add-Crc32-Optimization-in-Gzip-For-crc32-algorithm-i.patch -Patch103: 0103-SME-Remove-hip09-and-hip11-in-aarch64-cores.def-to-b.patch -Patch104: 0104-Backport-SME-AArch64-Cleanup-CPU-option-processing-c.patch -Patch105: 0105-Backport-SME-AArch64-Cleanup-option-processing-code.patch -Patch106: 0106-Backport-SME-aarch64-Add-march-support-for-Armv9.1-A.patch -Patch107: 0107-Backport-SME-Revert-aarch64-Define-__ARM_FEATURE_RCP.patch -Patch108: 0108-Backport-SME-Revert-Ampere-1-and-Ampere-1A-core-defi.patch -Patch109: 0109-Backport-SME-aarch64-Rename-AARCH64_ISA-architecture.patch -Patch110: 0110-Backport-SME-aarch64-Rename-AARCH64_FL-architecture-.patch -Patch111: 0111-Backport-SME-aarch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch -Patch112: 0112-Backport-SME-aarch64-Add-V-to-aarch64-arches.def-nam.patch -Patch113: 0113-Backport-SME-aarch64-Small-config.gcc-cleanups.patch -Patch114: 0114-Backport-SME-aarch64-Avoid-redundancy-in-aarch64-cor.patch -Patch115: 0115-Backport-SME-aarch64-Remove-AARCH64_FL_RCPC8_4-PR107.patch -Patch116: 0116-Backport-SME-aarch64-Fix-transitive-closure-of-featu.patch -Patch117: 0117-Backport-SME-aarch64-Reorder-an-entry-in-aarch64-opt.patch -Patch118: 0118-Backport-SME-aarch64-Simplify-feature-definitions.patch -Patch119: 0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch -Patch120: 0120-Backport-SME-aarch64-Avoid-std-string-in-static-data.patch -Patch121: 0121-Backport-SME-aarch64-Tweak-constness-of-option-relat.patch -Patch122: 0122-Backport-SME-aarch64-Make-more-use-of-aarch64_featur.patch -Patch123: 0123-Backport-SME-aarch64-Tweak-contents-of-flags_on-off-.patch -Patch124: 0124-Backport-SME-aarch64-Tweak-handling-of-mgeneral-regs.patch -Patch125: 0125-Backport-SME-aarch64-Remove-redundant-TARGET_-checks.patch -Patch126: 0126-Backport-SME-aarch64-Define-__ARM_FEATURE_RCPC.patch -Patch127: 0127-Backport-SME-Add-Ampere-1-and-Ampere-1A-core-definit.patch -Patch128: 0128-Backport-SME-aarch64-Fix-nosimd-handling-of-FPR-move.patch -Patch129: 0129-Backport-SME-aarch64-Commonise-some-folding-code.patch -Patch130: 0130-Backport-SME-aarch64-Add-a-Z-operand-modifier-for-SV.patch -Patch131: 0131-Backport-SME-mode-switching-Remove-unused-bbnum-fiel.patch -Patch132: 0132-Backport-SME-mode-switching-Tweak-the-macro-hook-doc.patch -Patch133: 0133-Backport-SME-mode-switching-Add-note-problem.patch -Patch134: 0134-Backport-SME-mode-switching-Avoid-quadractic-list-op.patch -Patch135: 0135-Backport-SME-mode-switching-Fix-the-mode-passed-to-t.patch -Patch136: 0136-Backport-SME-mode-switching-Simplify-recording-of-tr.patch -Patch137: 0137-Backport-SME-mode-switching-Tweak-entry-exit-handlin.patch -Patch138: 0138-Backport-SME-mode-switching-Allow-targets-to-set-the.patch -Patch139: 0139-Backport-SME-mode-switching-Pass-set-of-live-registe.patch -Patch140: 0140-Backport-SME-mode-switching-Pass-the-set-of-live-reg.patch -Patch141: 0141-Backport-SME-mode-switching-Use-1-based-edge-aux-fie.patch -Patch142: 0142-Backport-SME-mode-switching-Add-a-target-configurabl.patch -Patch143: 0143-Backport-SME-mode-switching-Add-a-backprop-hook.patch -Patch144: 0144-Backport-SME-aarch64-Add-a-result_mode-helper-functi.patch -Patch145: 0145-Backport-SME-rtl-Try-to-remove-EH-edges-after-pro-ep.patch -Patch146: 0146-Backport-SME-Fix-PR-middle-end-107705-ICE-after-recl.patch -Patch147: 0147-Backport-SME-function-Change-return-type-of-predicat.patch -Patch148: 0148-Backport-SME-Allow-prologues-and-epilogues-to-be-ins.patch -Patch149: 0149-Backport-SME-Add-a-target-hook-for-sibcall-epilogues.patch -Patch150: 0150-Backport-SME-Add-a-new-target-hook-TARGET_START_CALL.patch -Patch151: 0151-Backport-SME-Allow-targets-to-add-USEs-to-asms.patch -Patch152: 0152-Backport-SME-New-compact-syntax-for-insn-and-insn_sp.patch -Patch153: 0153-Backport-SME-recog-Improve-parser-for-pattern-new-co.patch -Patch154: 0154-Backport-SME-recog-Support-space-in-cons.patch -Patch155: 0155-Backport-SME-aarch64-Generalise-require_immediate_la.patch -Patch156: 0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch -Patch157: 0157-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch -Patch158: 0158-Backport-SME-aarch64-Simplify-output-template-emissi.patch -Patch159: 0159-Backport-SME-Improve-immediate-expansion-PR106583.patch -Patch160: 0160-Backport-SME-AArch64-Cleanup-move-immediate-code.patch -Patch161: 0161-Backport-SME-AArch64-convert-some-patterns-to-compac.patch -Patch162: 0162-Backport-SME-aarch64-Use-SVE-s-RDVL-instruction.patch -Patch163: 0163-Backport-SME-aarch64-Make-AARCH64_FL_SVE-requirement.patch -Patch164: 0164-Backport-SME-aarch64-Add-group-suffixes-to-SVE-intri.patch -Patch165: 0165-Backport-SME-aarch64-Add-sve_type-to-SVE-builtins-co.patch -Patch166: 0166-Backport-SME-aarch64-Generalise-some-SVE-ACLE-error-.patch -Patch167: 0167-Backport-SME-aarch64-Replace-vague-previous-argument.patch -Patch168: 0168-Backport-SME-aarch64-Make-more-use-of-sve_type-in-AC.patch -Patch169: 0169-Backport-SME-aarch64-Tweak-error-message-for-tuple-v.patch -Patch170: 0170-Backport-SME-aarch64-Add-tuple-forms-of-svreinterpre.patch -Patch171: 0171-Backport-SME-attribs-Use-existing-traits-for-excl_ha.patch -Patch172: 0172-Backport-SME-Allow-target-attributes-in-non-gnu-name.patch -Patch173: 0173-Backport-SME-aarch64-Fix-plugin-header-install.patch -Patch174: 0174-Backport-SME-aarch64-Add-arm_streaming-_compatible-a.patch -Patch175: 0175-Backport-SME-aarch64-Add-sme.patch -Patch176: 0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch -Patch177: 0177-Backport-SME-AArch64-Rewrite-simd-move-immediate-pat.patch -Patch178: 0178-Backport-SME-AArch64-remove-test-comment-from-mov-mo.patch -Patch179: 0179-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch -Patch180: 0180-Backport-SME-aarch64-Mark-relevant-SVE-instructions-.patch -Patch181: 0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch -Patch182: 0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch -Patch183: 0183-Backport-SME-aarch64-Avoid-a-use-of-callee_offset.patch -Patch184: 0184-Backport-SME-aarch64-Explicitly-handle-frames-with-n.patch -Patch185: 0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch -Patch186: 0186-Backport-SME-aarch64-Add-bytes_below_hard_fp-to-fram.patch -Patch187: 0187-Backport-SME-aarch64-Robustify-stack-tie-handling.patch -Patch188: 0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch -Patch189: 0189-Backport-SME-aarch64-Only-calculate-chain_offset-if-.patch -Patch190: 0190-Backport-SME-aarch64-Rename-locals_offset-to-bytes_a.patch -Patch191: 0191-Backport-SME-aarch64-Rename-hard_fp_offset-to-bytes_.patch -Patch192: 0192-Backport-SME-aarch64-Tweak-frame_size-comment.patch -Patch193: 0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch -Patch194: 0194-Backport-SME-aarch64-Simplify-top-of-frame-allocatio.patch -Patch195: 0195-Backport-SME-aarch64-Minor-initial-adjustment-tweak.patch -Patch196: 0196-Backport-SME-aarch64-Tweak-stack-clash-boundary-cond.patch -Patch197: 0197-Backport-SME-aarch64-Put-LR-save-probe-in-first-16-b.patch -Patch198: 0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch -Patch199: 0199-Backport-SME-aarch64-Explicitly-record-probe-registe.patch -Patch200: 0200-Backport-SME-aarch64-Remove-below_hard_fp_saved_regs.patch -Patch201: 0201-Backport-SME-aarch64-Make-stack-smash-canary-protect.patch -Patch202: 0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch -Patch203: 0203-Backport-SME-aarch64-Use-vecs-to-store-register-save.patch -Patch204: 0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch -Patch205: 0205-Backport-SME-aarch64-Switch-PSTATE.SM-around-calls.patch -Patch206: 0206-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch -Patch207: 0207-Backport-SME-aarch64-Add-a-register-class-for-w12-w1.patch -Patch208: 0208-Backport-SME-aarch64-Add-a-VNx1TI-mode.patch -Patch209: 0209-Backport-SME-aarch64-Generalise-unspec_based_functio.patch -Patch210: 0210-Backport-SME-aarch64-Generalise-_m-rules-for-SVE-int.patch -Patch211: 0211-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch -Patch212: 0212-Backport-SME-aarch64-Add-support-for-__arm_locally_s.patch -Patch213: 0213-Backport-SME-aarch64-Handle-PSTATE.SM-across-abnorma.patch -Patch214: 0214-Backport-SME-aarch64-Enforce-inlining-restrictions-f.patch -Patch215: 0215-Backport-SME-aarch64-Update-sibcall-handling-for-SME.patch -Patch216: 0216-Backport-SME-libgcc-aarch64-Configure-check-for-.var.patch -Patch217: 0217-Backport-SME-libgcc-aarch64-Configure-check-for-__ge.patch -Patch218: 0218-Backport-SME-libgcc-aarch64-Add-SME-runtime-support.patch -Patch219: 0219-Backport-SME-libgcc-aarch64-Add-SME-unwinder-support.patch -Patch220: 0220-Backport-SME-libgcc-Fix-config.in.patch -Patch221: 0221-Backport-SME-aarch64-Add-funwind-tables-to-some-test.patch -Patch222: 0222-Backport-SME-aarch64-Skip-some-SME-register-save-tes.patch -Patch223: 0223-Backport-SME-Add-OPTIONS_H_EXTRA-to-GTFILES.patch -Patch224: 0224-Backport-SME-aarch64-Add-V1DI-mode.patch -Patch225: 0225-Backport-SME-Allow-md-iterators-to-include-other-ite.patch -Patch226: 0226-Backport-SME-riscv-Add-support-for-strlen-inline-exp.patch -Patch227: 0227-Backport-SME-attribs-Add-overloads-with-namespace-na.patch -Patch228: 0228-Backport-SME-vec-Add-array_slice-constructors-from-n.patch -Patch229: 0229-Backport-SME-A-couple-of-va_gc_atomic-tweaks.patch -Patch230: 0230-Backport-SME-middle-end-Fix-issue-of-poly_uint16-1-1.patch -Patch231: 0231-SME-Add-missing-header-file-in-aarch64.cc.patch -Patch232: 0232-Backport-SME-c-Add-support-for-__extension__.patch -Patch233: 0233-Backport-SME-lra-Updates-of-biggest-mode-for-hard-re.patch -Patch234: 0234-Backport-SME-c-Support-C2x-empty-initializer-braces.patch -Patch235: 0235-Backport-SME-aarch64-Update-sizeless-tests-for-recen.patch -Patch236: 0236-Backport-SME-attribs-Namespace-aware-lookup_attribut.patch -Patch237: 0237-Backport-SME-c-family-ICE-with-gnu-nocf_check-PR1069.patch -Patch238: 0238-Backport-SME-AArch64-Fix-assert-in-aarch64_move_imm-.patch -Patch239: 0239-Backport-SME-testsuite-Only-run-fcf-protection-test-.patch -Patch240: 0240-Backport-SME-Fix-PRs-106764-106765-and-107307-all-IC.patch -Patch241: 0241-Backport-SME-aarch64-Remove-expected-error-for-compo.patch -Patch242: 0242-Backport-SME-aarch64-Remove-redundant-builtins-code.patch -Patch243: 0243-Backport-SME-AArch64-Fix-Armv9-a-warnings-that-get-e.patch -Patch244: 0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch -Patch245: 0245-Backport-SME-middle-end-Add-new-tbranch-optab-to-add.patch -Patch246: 0246-Backport-SME-explow-Allow-dynamic-allocations-after-.patch -Patch247: 0247-Backport-SME-PR105169-Fix-references-to-discarded-se.patch -Patch248: 0248-Backport-SME-RISC-V-autovec-Verify-that-GET_MODE_NUN.patch -Patch249: 0249-Backport-SME-Add-operator-to-gimple_stmt_iterator-an.patch -Patch250: 0250-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch -Patch251: 0251-SME-Adapt-some-testsuites.patch -Patch252: 0252-SME-Fix-error-by-backported-patches-and-IPA-prefetch.patch -Patch253: 0253-aarch64-Fix-return-register-handling-in-untyped_call.patch -Patch254: 0254-aarch64-Fix-loose-ldpstp-check.patch + +Patch103: 0103-aarch64-Use-local-frame-vars-in-shrink-wrapping-code.patch +Patch104: 0104-aarch64-Avoid-a-use-of-callee-offset.patch +Patch105: 0105-aarch64-Explicitly-handle-frames-with-no-saved-registers.patch +Patch106: 0106-aarch64-Add-bytes-below-saved-regs-to-frame-info.patch +Patch107: 0107-aarch64-Add-bytes-below-hard-fp-to-frame-info.patch +Patch108: 0108-aarch64-Tweak-aarch64-save-restore-callee-saves.patch +Patch109: 0109-aarch64-Only-calculate-chain-offset-if-there-is-a-chain.patch +Patch110: 0110-aarch64-Rename-locals-offset-to-bytes-above-locals.patch +Patch111: 0111-aarch64-Rename-hard-fp-offset-to-bytes-above-hard-fp.patch +Patch112: 0112-aarch64-Tweak-frame-size-comment.patch +Patch113: 0113-aarch64-Measure-reg-offset-from-the-bottom-of-the-frame.patch +Patch114: 0114-aarch64-Simplify-top-of-frame-allocation.patch +Patch115: 0115-aarch64-Minor-initial-adjustment-tweak.patch +Patch116: 0116-aarch64-Tweak-stack-clash-boundary-condition.patch +Patch117: 0117-aarch64-Put-LR-save-probe-in-first-16-bytes.patch +Patch118: 0118-aarch64-Simplify-probe-of-final-frame-allocation.patch +Patch119: 0119-aarch64-Explicitly-record-probe-registers-in-frame-info.patch +Patch120: 0120-aarch64-Remove-below-hard-fp-saved-regs-size.patch +Patch121: 0121-aarch64-Make-stack-smash-canary-protect-saved-registers.patch +Patch122: 0122-aarch64-Fix-return-register-handling-in-untyped_call.patch +Patch123: 0123-aarch64-Fix-loose-ldpstp-check.patch + Patch255: 0255-x86-Add-a-new-option-mdaz-ftz-to-enable-FTZ-and-DAZ-.patch Patch256: 0256-Explicitly-view_convert_expr-mask-to-signed-type-whe.patch Patch257: 0257-Make-option-mvzeroupper-independent-of-optimization-.patch @@ -390,7 +261,7 @@ Patch281: 0281-x86-Update-model-values-for-Raptorlake.patch Patch282: 0282-Fix-target_clone-arch-graniterapids-d.patch Patch283: 0283-i386-Change-prefetchi-output-template.patch Patch284: 0284-i386-Add-non-optimize-prefetchi-intrins.patch -Patch285: 0285-SME-Recover-hip09-and-hip11-in-aarch64-cores.def.patch + Patch286: 0286-Try-to-use-AI-model-to-guide-optimization.patch Patch287: 0287-Add-dynamic-memory-access-checks.patch Patch288: 0288-Enable-macro-use-commandline.patch @@ -1320,6 +1191,7 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch -P100 -p1 %patch -P101 -p1 %patch -P102 -p1 + %patch -P103 -p1 %patch -P104 -p1 %patch -P105 -p1 @@ -1341,137 +1213,7 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch -P121 -p1 %patch -P122 -p1 %patch -P123 -p1 -%patch -P124 -p1 -%patch -P125 -p1 -%patch -P126 -p1 -%patch -P127 -p1 -%patch -P128 -p1 -%patch -P129 -p1 -%patch -P130 -p1 -%patch -P131 -p1 -%patch -P132 -p1 -%patch -P133 -p1 -%patch -P134 -p1 -%patch -P135 -p1 -%patch -P136 -p1 -%patch -P137 -p1 -%patch -P138 -p1 -%patch -P139 -p1 -%patch -P140 -p1 -%patch -P141 -p1 -%patch -P142 -p1 -%patch -P143 -p1 -%patch -P144 -p1 -%patch -P145 -p1 -%patch -P146 -p1 -%patch -P147 -p1 -%patch -P148 -p1 -%patch -P149 -p1 -%patch -P150 -p1 -%patch -P151 -p1 -%patch -P152 -p1 -%patch -P153 -p1 -%patch -P154 -p1 -%patch -P155 -p1 -%patch -P156 -p1 -%patch -P157 -p1 -%patch -P158 -p1 -%patch -P159 -p1 -%patch -P160 -p1 -%patch -P161 -p1 -%patch -P162 -p1 -%patch -P163 -p1 -%patch -P164 -p1 -%patch -P165 -p1 -%patch -P166 -p1 -%patch -P167 -p1 -%patch -P168 -p1 -%patch -P169 -p1 -%patch -P170 -p1 -%patch -P171 -p1 -%patch -P172 -p1 -%patch -P173 -p1 -%patch -P174 -p1 -%patch -P175 -p1 -%patch -P176 -p1 -%patch -P177 -p1 -%patch -P178 -p1 -%patch -P179 -p1 -%patch -P180 -p1 -%patch -P181 -p1 -%patch -P182 -p1 -%patch -P183 -p1 -%patch -P184 -p1 -%patch -P185 -p1 -%patch -P186 -p1 -%patch -P187 -p1 -%patch -P188 -p1 -%patch -P189 -p1 -%patch -P190 -p1 -%patch -P191 -p1 -%patch -P192 -p1 -%patch -P193 -p1 -%patch -P194 -p1 -%patch -P195 -p1 -%patch -P196 -p1 -%patch -P197 -p1 -%patch -P198 -p1 -%patch -P199 -p1 -%patch -P200 -p1 -%patch -P201 -p1 -%patch -P202 -p1 -%patch -P203 -p1 -%patch -P204 -p1 -%patch -P205 -p1 -%patch -P206 -p1 -%patch -P207 -p1 -%patch -P208 -p1 -%patch -P209 -p1 -%patch -P210 -p1 -%patch -P211 -p1 -%patch -P212 -p1 -%patch -P213 -p1 -%patch -P214 -p1 -%patch -P215 -p1 -%patch -P216 -p1 -%patch -P217 -p1 -%patch -P218 -p1 -%patch -P219 -p1 -%patch -P220 -p1 -%patch -P221 -p1 -%patch -P222 -p1 -%patch -P223 -p1 -%patch -P224 -p1 -%patch -P225 -p1 -%patch -P226 -p1 -%patch -P227 -p1 -%patch -P228 -p1 -%patch -P229 -p1 -%patch -P230 -p1 -%patch -P231 -p1 -%patch -P232 -p1 -%patch -P233 -p1 -%patch -P234 -p1 -%patch -P235 -p1 -%patch -P236 -p1 -%patch -P237 -p1 -%patch -P238 -p1 -%patch -P239 -p1 -%patch -P240 -p1 -%patch -P241 -p1 -%patch -P242 -p1 -%patch -P243 -p1 -%patch -P244 -p1 -%patch -P245 -p1 -%patch -P246 -p1 -%patch -P247 -p1 -%patch -P248 -p1 -%patch -P249 -p1 -%patch -P250 -p1 -%patch -P251 -p1 -%patch -P252 -p1 -%patch -P253 -p1 -%patch -P254 -p1 + %patch -P255 -p1 %patch -P256 -p1 %patch -P257 -p1 @@ -1502,7 +1244,7 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch -P282 -p1 %patch -P283 -p1 %patch -P284 -p1 -%patch -P285 -p1 + %patch -P286 -p1 %patch -P287 -p1 %patch -P288 -p1 @@ -3339,7 +3081,6 @@ end %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/arm_fp16.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/arm_bf16.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/arm_sve.h -%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/arm_sme.h %endif %ifarch loongarch64 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/larchintrin.h @@ -4167,6 +3908,12 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Wed Dec 11 2024 eastb233 - 12.3.1-60 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC:Revert SME because of performance issue + * Wed Dec 11 2024 Zhenyu Zhao - 12.3.1-59 - Type:Sync - ID:NA -- Gitee